From b7eebb87b33ea4af3efe689c5a0469df4a134313 Mon Sep 17 00:00:00 2001 From: defiQUG Date: Mon, 13 Apr 2026 21:41:14 -0700 Subject: [PATCH] Add Sankofa consolidated hub operator tooling --- .../validate-sankofa-nginx-examples.yml | 25 +++ AGENTS.md | 2 + .../sankofa-consolidated-runtime.example.yml | 15 ++ config/ip-addresses.conf | 8 + .../sankofa-phoenix-7800-guard-dport-4000.nft | 12 ++ .../nginx/sankofa-api-hub-main.example.conf | 19 ++ config/nginx/sankofa-hub-main.example.conf | 23 +++ .../sankofa-non-chain-frontends.example.conf | 27 +++ .../sankofa-phoenix-api-hub.example.conf | 65 ++++++ ...fa-non-chain-web-hub-nginx.service.example | 14 ++ ...kofa-phoenix-api-hub-nginx.service.example | 16 ++ ...CHAIN_ECOSYSTEM_HYPERSCALER_STYLE_MODEL.md | 96 +++++++++ ...ON_CHAIN_ECOSYSTEM_PLAN_REVIEW_AND_GAPS.md | 166 +++++++++++++++ ...A_PHOENIX_CONSOLIDATED_FRONTEND_AND_API.md | 158 ++++++++++++++ docs/02-architecture/SERVICE_DESCRIPTIONS.md | 14 +- ...UB_NPM_CUTOVER_AND_POST_CUTOVER_RUNBOOK.md | 92 +++++++++ ...01_CONSOLIDATION_AND_HUB_PLACEMENT_GOAL.md | 96 +++++++++ docs/04-configuration/ALL_VMIDS_ENDPOINTS.md | 13 +- docs/MASTER_INDEX.md | 4 +- package.json | 2 + .../ensure-dbis-api-trust-proxy-on-ct.sh | 83 ++++++++ ...nkofa-phoenix-7800-nft-dport-4000-guard.sh | 78 +++++++ ...-sankofa-phoenix-api-db-migrate-up-7800.sh | 91 ++++++++ ...sankofa-phoenix-api-env-lan-parity-7800.sh | 169 +++++++++++++++ ...x-api-hub-graphql-ws-proxy-headers-7800.sh | 141 +++++++++++++ ...hoenix-api-hub-systemd-exec-reload-7800.sh | 92 +++++++++ ...nkofa-phoenix-apollo-bind-loopback-7800.sh | 105 ++++++++++ ...raphql-ws-remove-fastify-websocket-7800.sh | 108 ++++++++++ ...oenix-tls-config-terminate-at-edge-7800.sh | 117 +++++++++++ ...phoenix-websocket-ts-import-logger-7800.sh | 99 +++++++++ ...enix-ws-disable-permessage-deflate-7800.sh | 105 ++++++++++ .../install-sankofa-api-hub-nginx-on-pve.sh | 195 ++++++++++++++++++ ...-phoenix-apollo-port-4000-restrict-7800.sh | 44 ++++ .../plan-sankofa-consolidated-hub-cutover.sh | 38 ++++ scripts/env.r630-01.example | 2 + scripts/lib/load-project-env.sh | 5 +- .../update-npmplus-proxy-hosts-api.sh | 9 +- ...eck-sankofa-consolidated-nginx-examples.sh | 54 +++++ scripts/verify/smoke-phoenix-api-hub-lan.sh | 33 +++ .../smoke-phoenix-graphql-ws-subscription.mjs | 68 ++++++ .../smoke-phoenix-graphql-wss-public.sh | 66 ++++++ .../verify-sankofa-consolidated-hub-lan.sh | 80 +++++++ 42 files changed, 2635 insertions(+), 14 deletions(-) create mode 100644 .github/workflows/validate-sankofa-nginx-examples.yml create mode 100644 config/compose/sankofa-consolidated-runtime.example.yml create mode 100644 config/nftables/sankofa-phoenix-7800-guard-dport-4000.nft create mode 100644 config/nginx/sankofa-api-hub-main.example.conf create mode 100644 config/nginx/sankofa-hub-main.example.conf create mode 100644 config/nginx/sankofa-non-chain-frontends.example.conf create mode 100644 config/nginx/sankofa-phoenix-api-hub.example.conf create mode 100644 config/systemd/sankofa-non-chain-web-hub-nginx.service.example create mode 100644 config/systemd/sankofa-phoenix-api-hub-nginx.service.example create mode 100644 docs/02-architecture/NON_CHAIN_ECOSYSTEM_HYPERSCALER_STYLE_MODEL.md create mode 100644 docs/02-architecture/NON_CHAIN_ECOSYSTEM_PLAN_REVIEW_AND_GAPS.md create mode 100644 docs/02-architecture/SANKOFA_PHOENIX_CONSOLIDATED_FRONTEND_AND_API.md create mode 100644 docs/03-deployment/SANKOFA_API_HUB_NPM_CUTOVER_AND_POST_CUTOVER_RUNBOOK.md create mode 100644 docs/03-deployment/SANKOFA_R630_01_CONSOLIDATION_AND_HUB_PLACEMENT_GOAL.md create mode 100755 scripts/deployment/ensure-dbis-api-trust-proxy-on-ct.sh create mode 100755 scripts/deployment/ensure-sankofa-phoenix-7800-nft-dport-4000-guard.sh create mode 100755 scripts/deployment/ensure-sankofa-phoenix-api-db-migrate-up-7800.sh create mode 100755 scripts/deployment/ensure-sankofa-phoenix-api-env-lan-parity-7800.sh create mode 100755 scripts/deployment/ensure-sankofa-phoenix-api-hub-graphql-ws-proxy-headers-7800.sh create mode 100755 scripts/deployment/ensure-sankofa-phoenix-api-hub-systemd-exec-reload-7800.sh create mode 100755 scripts/deployment/ensure-sankofa-phoenix-apollo-bind-loopback-7800.sh create mode 100755 scripts/deployment/ensure-sankofa-phoenix-graphql-ws-remove-fastify-websocket-7800.sh create mode 100755 scripts/deployment/ensure-sankofa-phoenix-tls-config-terminate-at-edge-7800.sh create mode 100755 scripts/deployment/ensure-sankofa-phoenix-websocket-ts-import-logger-7800.sh create mode 100755 scripts/deployment/ensure-sankofa-phoenix-ws-disable-permessage-deflate-7800.sh create mode 100755 scripts/deployment/install-sankofa-api-hub-nginx-on-pve.sh create mode 100755 scripts/deployment/plan-phoenix-apollo-port-4000-restrict-7800.sh create mode 100755 scripts/deployment/plan-sankofa-consolidated-hub-cutover.sh create mode 100755 scripts/verify/check-sankofa-consolidated-nginx-examples.sh create mode 100755 scripts/verify/smoke-phoenix-api-hub-lan.sh create mode 100644 scripts/verify/smoke-phoenix-graphql-ws-subscription.mjs create mode 100755 scripts/verify/smoke-phoenix-graphql-wss-public.sh create mode 100755 scripts/verify/verify-sankofa-consolidated-hub-lan.sh diff --git a/.github/workflows/validate-sankofa-nginx-examples.yml b/.github/workflows/validate-sankofa-nginx-examples.yml new file mode 100644 index 00000000..c1ac6218 --- /dev/null +++ b/.github/workflows/validate-sankofa-nginx-examples.yml @@ -0,0 +1,25 @@ +# nginx -t on Sankofa consolidated example configs (no LAN). +name: Validate Sankofa nginx examples + +on: + pull_request: + paths: + - 'config/nginx/sankofa-*.example.conf' + - 'config/nginx/sankofa-*-main.example.conf' + - 'scripts/verify/check-sankofa-consolidated-nginx-examples.sh' + - '.github/workflows/validate-sankofa-nginx-examples.yml' + push: + branches: [main, master] + paths: + - 'config/nginx/sankofa-*.example.conf' + - 'config/nginx/sankofa-*-main.example.conf' + - 'scripts/verify/check-sankofa-consolidated-nginx-examples.sh' + - '.github/workflows/validate-sankofa-nginx-examples.yml' + +jobs: + nginx-syntax: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: nginx -t (host or Docker) + run: bash scripts/verify/check-sankofa-consolidated-nginx-examples.sh diff --git a/AGENTS.md b/AGENTS.md index 03a075eb..3c945cb0 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -58,6 +58,8 @@ Orchestration for Proxmox VE, Chain 138 (`smom-dbis-138/`), explorers, NPMplus, | Sankofa portal → CT 7801 (build + restart) | `./scripts/deployment/sync-sankofa-portal-7801.sh` (`--dry-run` first); default `NEXTAUTH_URL=https://portal.sankofa.nexus` via `sankofa-portal-ensure-nextauth-on-ct.sh`; IT `/it` env: `sankofa-portal-merge-it-read-api-env-from-repo.sh` (`IT_READ_API_URL` in repo `.env`) | | Portal Keycloak OIDC secret on CT 7801 | After client exists: `./scripts/deployment/sankofa-portal-merge-keycloak-env-from-repo.sh` (needs `KEYCLOAK_CLIENT_SECRET` in repo `.env`; base64-safe over SSH) | | Sankofa corporate web → CT 7806 | Provision: `./scripts/deployment/provision-sankofa-public-web-lxc-7806.sh`. Sync: `./scripts/deployment/sync-sankofa-public-web-to-ct.sh`. systemd: `config/systemd/sankofa-public-web.service`. Set `IP_SANKOFA_PUBLIC_WEB` in `.env`, then `scripts/nginx-proxy-manager/update-npmplus-proxy-hosts-api.sh` | +| **Non-chain ecosystem** (hyperscaler-style: edge / API hub / IdP / data cells; **not** chain CTs) | `docs/02-architecture/NON_CHAIN_ECOSYSTEM_HYPERSCALER_STYLE_MODEL.md` — gap review: `docs/02-architecture/NON_CHAIN_ECOSYSTEM_PLAN_REVIEW_AND_GAPS.md` | +| Sankofa / Phoenix **consolidated hub** (optional — fewer non-chain LXCs) | `docs/02-architecture/SANKOFA_PHOENIX_CONSOLIDATED_FRONTEND_AND_API.md` — `docs/03-deployment/SANKOFA_R630_01_CONSOLIDATION_AND_HUB_PLACEMENT_GOAL.md` (offload r630-01: consolidate + **place** hubs on quieter nodes); `docs/03-deployment/SANKOFA_API_HUB_NPM_CUTOVER_AND_POST_CUTOVER_RUNBOOK.md` (NPM → `:8080`, `TRUST_PROXY`, rollback); `bash scripts/verify/check-sankofa-consolidated-nginx-examples.sh`; `bash scripts/verify/verify-sankofa-consolidated-hub-lan.sh`; `bash scripts/verify/smoke-phoenix-api-hub-lan.sh` (hub **:8080**); `pnpm run verify:phoenix-graphql-wss` or `PHOENIX_WSS_INCLUDE_LAN=1 bash scripts/verify/smoke-phoenix-graphql-wss-public.sh` (HTTP **101** WS upgrade); `pnpm run verify:phoenix-graphql-ws-subscription` (**connection_ack**); `bash scripts/deployment/ensure-sankofa-phoenix-graphql-ws-remove-fastify-websocket-7800.sh` (fix RSV1 / competing upgrade listener); `bash scripts/deployment/ensure-sankofa-phoenix-websocket-ts-import-logger-7800.sh` (**logger** import in `websocket.ts` — avoids **crash on WS disconnect**); `bash scripts/deployment/ensure-sankofa-phoenix-7800-nft-dport-4000-guard.sh` (optional **nft** reject :4000 from non-loopback); `bash scripts/deployment/ensure-sankofa-phoenix-api-hub-graphql-ws-proxy-headers-7800.sh` (hub `/graphql-ws` **Accept-Encoding** / **proxy_buffering**); `bash scripts/deployment/ensure-sankofa-phoenix-api-hub-systemd-exec-reload-7800.sh` (hub **ExecReload**); `bash scripts/deployment/ensure-sankofa-phoenix-api-env-lan-parity-7800.sh` (**.env** Keycloak + Postgres LAN + `NODE_ENV` policy); `bash scripts/deployment/ensure-sankofa-phoenix-api-db-migrate-up-7800.sh` (**pnpm db:migrate:up** on **7800**); `bash scripts/deployment/ensure-sankofa-phoenix-tls-config-terminate-at-edge-7800.sh` (production **HTTP** behind NPM); `bash scripts/deployment/plan-phoenix-apollo-port-4000-restrict-7800.sh` (`--ssh`); **Apollo loopback:** `PROXMOX_OPS_APPLY=1` `PROXMOX_OPS_ALLOWED_VMIDS=7800` `bash scripts/deployment/ensure-sankofa-phoenix-apollo-bind-loopback-7800.sh --apply --vmid 7800`; `bash scripts/deployment/plan-sankofa-consolidated-hub-cutover.sh`; **API hub on CT:** `bash scripts/deployment/install-sankofa-api-hub-nginx-on-pve.sh --dry-run --vmid 7800` (live: `PROXMOX_OPS_APPLY=1` `PROXMOX_OPS_ALLOWED_VMIDS=7800` `--apply --vmid 7800`); **dbis API `TRUST_PROXY`:** `PROXMOX_OPS_APPLY=1` `PROXMOX_OPS_ALLOWED_VMIDS=10150` `bash scripts/deployment/ensure-dbis-api-trust-proxy-on-ct.sh --apply --vmid 10150` (repeat **10151**); NPM fleet: `SANKOFA_NPM_PHOENIX_PORT=8080` + `scripts/nginx-proxy-manager/update-npmplus-proxy-hosts-api.sh` (Phoenix rows use **WebSocket: true**); `.env` hub overrides optional | | CCIP relay (r630-01 host) | WETH lane: `config/systemd/ccip-relay.service`. Mainnet cW lane: `config/systemd/ccip-relay-mainnet-cw.service` (health `http://192.168.11.11:9863/healthz`). Public edge: set `CCIP_RELAY_MAINNET_CW_PUBLIC_HOST`, run `scripts/nginx-proxy-manager/update-npmplus-proxy-hosts-api.sh`, relay-only `scripts/nginx-proxy-manager/upsert-ccip-relay-mainnet-cw-proxy-host.sh`, or SSH hop `scripts/nginx-proxy-manager/upsert-ccip-relay-mainnet-cw-via-ssh.sh`; DNS `scripts/cloudflare/configure-relay-mainnet-cw-dns.sh`. Use `NPM_URL=https://…:81` for API scripts (HTTP on :81 301s to HTTPS). | | XDC Zero + Chain 138 (parallel to CCIP) | `bash scripts/xdc-zero/run-xdc-zero-138-operator-sequence.sh` · `docs/03-deployment/CHAIN138_XDC_ZERO_BRIDGE_RUNBOOK.md` · `CHAIN138_XDC_ZERO_DEPLOYMENT_TROUBLESHOOTING.md` · `config/xdc-zero/` · `scripts/xdc-zero/` · systemd `node dist/server.js` template — **XDC mainnet RPC:** `https://rpc.xinfin.network` (chain id 50; more endpoints: [chainid.network/chain/50](https://chainid.network/chain/50/)); **Chain 138 side:** Core `http://192.168.11.211:8545` is operator-only, relayer/services use `https://rpc-http-pub.d-bis.org` | | OP Stack Standard Rollup (Ethereum mainnet, Superchain) | `docs/03-deployment/OP_STACK_STANDARD_ROLLUP_SUPERCHAIN_RUNBOOK.md` · optional L2↔Besu notes `docs/03-deployment/OP_STACK_L2_AND_BESU138_BRIDGE_NOTES.md` · `config/op-stack-superchain/` · `scripts/op-stack/` (e.g. `fetch-standard-mainnet-toml.sh`, checklist scripts) · `config/systemd/op-stack-*.example.service` — **distinct L2 chain ID from Besu 138**; follow [Optimism superchain-registry](https://github.com/ethereum-optimism/superchain-registry) for listing | diff --git a/config/compose/sankofa-consolidated-runtime.example.yml b/config/compose/sankofa-consolidated-runtime.example.yml new file mode 100644 index 00000000..3f14c4ff --- /dev/null +++ b/config/compose/sankofa-consolidated-runtime.example.yml @@ -0,0 +1,15 @@ +# Example: API hub container only. Point upstream blocks in +# config/nginx/sankofa-phoenix-api-hub.example.conf to real Phoenix (e.g. :4000) and +# dbis_core (:3000) — on the same host use 127.0.0.1 after colocation, or LAN IPs +# (e.g. 192.168.11.50, 192.168.11.155) during migration. +# +# docker compose -f sankofa-consolidated-runtime.example.yml up -d + +services: + api-hub: + image: nginx:1.27-alpine + ports: + - "8080:8080" + volumes: + - ../nginx/sankofa-phoenix-api-hub.example.conf:/etc/nginx/conf.d/default.conf:ro + restart: unless-stopped diff --git a/config/ip-addresses.conf b/config/ip-addresses.conf index 340a2090..376686d7 100644 --- a/config/ip-addresses.conf +++ b/config/ip-addresses.conf @@ -273,6 +273,14 @@ SANKOFA_CLIENT_SSO_PORT="${SANKOFA_CLIENT_SSO_PORT:-$SANKOFA_PORTAL_PORT}" # IP_SANKOFA_DASH="192.168.11.xx" # SANKOFA_DASH_PORT="${SANKOFA_DASH_PORT:-3000}" +# Optional consolidated non-chain runtime (nginx web hub + API path hub). Defaults below +# match the discrete CT model until you provision hub LXCs and set overrides in .env. +# See docs/02-architecture/SANKOFA_PHOENIX_CONSOLIDATED_FRONTEND_AND_API.md +IP_SANKOFA_WEB_HUB="${IP_SANKOFA_WEB_HUB:-$IP_SANKOFA_PORTAL}" +SANKOFA_WEB_HUB_PORT="${SANKOFA_WEB_HUB_PORT:-$SANKOFA_PORTAL_PORT}" +IP_SANKOFA_PHOENIX_API_HUB="${IP_SANKOFA_PHOENIX_API_HUB:-$IP_SANKOFA_PHOENIX_API}" +SANKOFA_PHOENIX_API_HUB_PORT="${SANKOFA_PHOENIX_API_HUB_PORT:-$SANKOFA_PHOENIX_API_PORT}" + # Gov Portals dev (VMID 7804) — DBIS, ICCC, OMNL, XOM at *.xom-dev.phoenix.sankofa.nexus IP_GOV_PORTALS_DEV="192.168.11.54" diff --git a/config/nftables/sankofa-phoenix-7800-guard-dport-4000.nft b/config/nftables/sankofa-phoenix-7800-guard-dport-4000.nft new file mode 100644 index 00000000..71a5ae04 --- /dev/null +++ b/config/nftables/sankofa-phoenix-7800-guard-dport-4000.nft @@ -0,0 +1,12 @@ +# Optional defense-in-depth on Phoenix API LXC (VMID 7800): reject TCP :4000 from non-loopback +# interfaces. Safe when Apollo binds 127.0.0.1:4000 (no listener on eth0); catches accidental +# 0.0.0.0:4000 bind. Load with: nft -f /path/to/this.nft +# +# Remove: nft delete table inet sankofa_phoenix_guard + +table inet sankofa_phoenix_guard { + chain input { + type filter hook input priority -50; policy accept; + meta iifname != "lo" tcp dport 4000 reject with tcp reset + } +} diff --git a/config/nginx/sankofa-api-hub-main.example.conf b/config/nginx/sankofa-api-hub-main.example.conf new file mode 100644 index 00000000..d963c8e0 --- /dev/null +++ b/config/nginx/sankofa-api-hub-main.example.conf @@ -0,0 +1,19 @@ +# Top-level nginx.conf for API hub — copy to /etc/sankofa-phoenix-api-hub/nginx.conf on CT. +# Include path-based site from sankofa-phoenix-api-hub.example.conf under conf.d/ + +user nginx; +worker_processes auto; +error_log /var/log/nginx/error.log warn; +pid /var/run/nginx.pid; + +events { + worker_connections 1024; +} + +http { + include /etc/nginx/mime.types; + default_type application/octet-stream; + sendfile on; + keepalive_timeout 65; + include /etc/sankofa-phoenix-api-hub/conf.d/*.conf; +} diff --git a/config/nginx/sankofa-hub-main.example.conf b/config/nginx/sankofa-hub-main.example.conf new file mode 100644 index 00000000..040ae2ac --- /dev/null +++ b/config/nginx/sankofa-hub-main.example.conf @@ -0,0 +1,23 @@ +# Top-level nginx.conf for systemd ExecStart -c /etc/sankofa-web-hub/nginx.conf +# On CT: install snippets to /etc/sankofa-web-hub/ and include site conf from repo examples. +# +# mkdir -p /etc/sankofa-web-hub/conf.d +# cp repo/config/nginx/sankofa-non-chain-frontends.example.conf /etc/sankofa-web-hub/conf.d/site.conf +# cp repo/config/nginx/sankofa-hub-main.example.conf /etc/sankofa-web-hub/nginx.conf + +user nginx; +worker_processes auto; +error_log /var/log/nginx/error.log warn; +pid /var/run/nginx.pid; + +events { + worker_connections 1024; +} + +http { + include /etc/nginx/mime.types; + default_type application/octet-stream; + sendfile on; + keepalive_timeout 65; + include /etc/sankofa-web-hub/conf.d/*.conf; +} diff --git a/config/nginx/sankofa-non-chain-frontends.example.conf b/config/nginx/sankofa-non-chain-frontends.example.conf new file mode 100644 index 00000000..8b0cae44 --- /dev/null +++ b/config/nginx/sankofa-non-chain-frontends.example.conf @@ -0,0 +1,27 @@ +# Example: single nginx "web hub" for multiple non-chain hostnames (static roots). +# Copy to an LXC as e.g. /etc/nginx/sites-enabled/sankofa-web-hub.conf and adjust paths. +# TLS: typically terminate at NPM; LAN can stay HTTP to this host. + +map $host $sankofa_site_root { + portal.sankofa.nexus /var/www/portal; + admin.sankofa.nexus /var/www/admin; + sankofa.nexus /var/www/corporate; + www.sankofa.nexus /var/www/corporate; + # clients.example.nexus /var/www/clients/example; + default /var/www/default; +} + +server { + listen 80; + server_name portal.sankofa.nexus admin.sankofa.nexus sankofa.nexus www.sankofa.nexus; + + root $sankofa_site_root; + index index.html; + + location / { + try_files $uri $uri/ /index.html; + } + + add_header X-Content-Type-Options nosniff; + add_header Referrer-Policy strict-origin-when-cross-origin; +} diff --git a/config/nginx/sankofa-phoenix-api-hub.example.conf b/config/nginx/sankofa-phoenix-api-hub.example.conf new file mode 100644 index 00000000..94d0cf9f --- /dev/null +++ b/config/nginx/sankofa-phoenix-api-hub.example.conf @@ -0,0 +1,65 @@ +# Tier-1 API hub: one nginx listener; path-based routes to internal Phoenix + dbis_core. +# Upstreams = loopback on same LXC after you colocate processes, or LAN IPs during migration. +# +# Adjust upstream addresses/ports to match ALL_VMIDS_ENDPOINTS / your compose layout. + +upstream sankofa_phoenix_graphql { + server 127.0.0.1:4000; + keepalive 32; +} + +upstream dbis_core_rest { + server 127.0.0.1:3000; + keepalive 32; +} + +server { + listen 8080; + server_name _; + + # Optional: restrict to NPM / LAN source IPs + # allow 192.168.11.0/24; + # deny all; + + location = /health { + default_type application/json; + return 200 '{"status":"hub-up"}'; + } + + location /graphql { + proxy_http_version 1.1; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_read_timeout 300s; + proxy_pass http://sankofa_phoenix_graphql; + } + + location /graphql-ws { + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection "upgrade"; + proxy_set_header Accept-Encoding ""; + proxy_buffering off; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_read_timeout 3600s; + proxy_pass http://sankofa_phoenix_graphql; + } + + location /api/ { + proxy_http_version 1.1; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_pass http://dbis_core_rest; + } + + location /api-docs { + proxy_pass http://dbis_core_rest; + } +} diff --git a/config/systemd/sankofa-non-chain-web-hub-nginx.service.example b/config/systemd/sankofa-non-chain-web-hub-nginx.service.example new file mode 100644 index 00000000..05be7cb1 --- /dev/null +++ b/config/systemd/sankofa-non-chain-web-hub-nginx.service.example @@ -0,0 +1,14 @@ +[Unit] +Description=Sankofa non-chain web hub (nginx static roots per Host) +After=network.target + +[Service] +Type=simple +ExecStartPre=/usr/sbin/nginx -t -c /etc/sankofa-web-hub/nginx.conf +ExecStart=/usr/sbin/nginx -g "daemon off;" -c /etc/sankofa-web-hub/nginx.conf +ExecReload=/usr/sbin/nginx -s reload +Restart=on-failure +RestartSec=5 + +[Install] +WantedBy=multi-user.target diff --git a/config/systemd/sankofa-phoenix-api-hub-nginx.service.example b/config/systemd/sankofa-phoenix-api-hub-nginx.service.example new file mode 100644 index 00000000..d735f18c --- /dev/null +++ b/config/systemd/sankofa-phoenix-api-hub-nginx.service.example @@ -0,0 +1,16 @@ +[Unit] +Description=Sankofa Phoenix API hub (nginx path router to Phoenix GraphQL + dbis_core REST) +After=network.target +# After=dbis-core.service sankofa-phoenix-api.service +# Uncomment Wants= when subgraphs are on the same host and managed by systemd. + +[Service] +Type=simple +ExecStartPre=/usr/sbin/nginx -t -c /etc/sankofa-phoenix-api-hub/nginx.conf +ExecStart=/usr/sbin/nginx -g "daemon off;" -c /etc/sankofa-phoenix-api-hub/nginx.conf +ExecReload=/usr/sbin/nginx -s reload -c /etc/sankofa-phoenix-api-hub/nginx.conf +Restart=on-failure +RestartSec=5 + +[Install] +WantedBy=multi-user.target diff --git a/docs/02-architecture/NON_CHAIN_ECOSYSTEM_HYPERSCALER_STYLE_MODEL.md b/docs/02-architecture/NON_CHAIN_ECOSYSTEM_HYPERSCALER_STYLE_MODEL.md new file mode 100644 index 00000000..fa1d182a --- /dev/null +++ b/docs/02-architecture/NON_CHAIN_ECOSYSTEM_HYPERSCALER_STYLE_MODEL.md @@ -0,0 +1,96 @@ +# Non-chain ecosystem — hyperscaler-style design and deployment + +**Status:** Architecture / target operating model +**Last updated:** 2026-04-13 +**Scope:** Everything **except** blockchain-adjacent guests and services (Besu validators and RPC lanes, Blockscout-style explorers, bridge relayers, Chain 138 deploy paths, token-aggregation **runtime** tied to chain RPC). Those stay on their own **chain plane** with chain-specific runbooks. This document is the **application and edge plane** for Sankofa, Phoenix, DBIS core, portals, NPM, identity, and supporting data. + +--- + +## 1. What “ecosystem” means here + +A coherent **platform**: operators and clients interact through a small number of **managed surfaces** (DNS, TLS, APIs, portals), backed by **clear boundaries** (identity, data, observability, change management). Hyperscalers do not run “one random VM per microsite”; they run **regional edge**, **shared app runtimes**, **managed data**, and **global control planes** with strict contracts. + +Your non-chain ecosystem should **feel** like that: fewer hand-crafted snowflakes, more **repeatable cells** (LXC or VM patterns), **declared** upstreams, and **observable** health—not a flat list of unrelated CTs. + +--- + +## 2. Hyperscaler concepts mapped to this program + +| Hyperscaler idea | Plain language | This ecosystem (non-chain) | +|------------------|----------------|----------------------------| +| **Region** | Geography / failure domain | **LAN site** (e.g. VLAN 11 + Proxmox cluster) — one “region” today; multi-site is a later region pair. | +| **Availability zone** | Independent power/network within a region | **Proxmox nodes** (e.g. r630-01 vs r630-04) — place **stateless edge** and **burst** workloads across nodes; keep **tightly coupled** DB + app tiers co-located unless latency and HA analysis say otherwise. | +| **Edge / front door** | TLS termination, routing, WAF | **NPMplus** (and optional Cloudflare in front) — single place for certs, forced HTTPS, and upstream policy. | +| **API gateway / mesh ingress** | One front for many backends | **Phoenix API hub** (nginx Tier 1 today; optional BFF Tier 2) — `/graphql`, `/api`, consistent headers, rate limits, `TRUST_PROXY` alignment for `dbis_core`. | +| **Managed Kubernetes / App Service** | Standard runtime for web APIs | **LXC templates**: one pattern for “Node + systemd”, one for “nginx static only”, one for “Postgres only” — same packages, same hardening checklist. | +| **Identity (IdP)** | Central auth | **Keycloak** — realms, clients, MFA policy; portals are **clients**, not bespoke login servers. | +| **Managed database** | Durable state, backups, PITR | **Postgres** for Phoenix / portal data — backups, restore drills, connection limits documented. | +| **Service directory** | What runs where | **`ALL_VMIDS_ENDPOINTS.md`** + `config/ip-addresses.conf` + (when adopted) **hub env** — treat as **service catalog**, not tribal knowledge. | +| **Observability** | Metrics, logs, traces | **Per-cell**: node_exporter or similar where you standardize; **aggregator** (Grafana/Loki stack when you add it) — same pattern as “send logs to the regional pipeline.” | +| **Landing zone / policy** | Guardrails before workloads land | **`PROXMOX_OPS_APPLY`**, `PROXMOX_OPS_ALLOWED_VMIDS`, dry-run scripts, `proxmox-production-guard.sh` — “no mutation without contract,” similar to Azure Policy / SCP ideas at small scale. | +| **IaC / GitOps** | Desired state from repo | **This repo**: scripts + `config/` + runbooks; optional future **declarative** host config (e.g. cloud-init templates per role) so new CTs are **cloned from role**, not artisanal. | + +--- + +## 3. Target cell types (non-chain) + +Design the fleet from a **small menu** of cell types; anything that does not fit forces a design review. + +1. **Edge-static cell** — nginx only; multiple `server_name` or `map $host`; static `root` per product line. Lowest RAM. Good for marketing, entity microsites (exported), status pages, and **SPAs that only talk to APIs** (no server-only NextAuth on that host). **IRU / marketplace discovery** often stays **dynamic** (SSR or browser app against `dbis_core`) until a deliberate static-export pipeline exists—do not assume static-first fits all catalog UX. +2. **Edge-SSR cell** — one Node process (or small cluster later) for NextAuth / server components; **one** cell per “SSR family,” not one per brand, where host-based routing suffices. +3. **API hub cell** — nginx (or future BFF) only; upstreams to Phoenix Apollo and `dbis_core` over LAN. **Prefer placement** on a node with headroom (see [SANKOFA_R630_01_CONSOLIDATION_AND_HUB_PLACEMENT_GOAL.md](../03-deployment/SANKOFA_R630_01_CONSOLIDATION_AND_HUB_PLACEMENT_GOAL.md)). +4. **Data cell** — Postgres (and optional read replica pattern later); no arbitrary co-install of app servers. +5. **Identity cell** — Keycloak; isolated upgrades and backup story. +6. **Operator / control** — NPM, IT read API, inventory jobs — same hardening and backup discipline as “regional tooling” accounts in public cloud. + +**Anti-pattern:** one-off CTs that mix “random nginx + cron + manual edits” without a role name in the catalog. + +--- + +## 4. Practices to adopt (hyperscaler-aligned) + +- **Single edge story:** NPM (and DNS) as the **only** public entry contract; internal IPs are implementation details. +- **Hub-and-spoke APIs:** clients talk to **one** Phoenix-facing origin where possible; backends stay private on LAN. **CORS** allowlists must include **every browser origin** that calls the API (portal, admin, studio, marketplace SPAs)—not only hostnames served by the static web hub. +- **Blast radius:** consolidating statics **reduces** attack surface and cert sprawl; moving hubs off overloaded nodes **reduces** correlated failure under load. +- **Versioned change:** runbooks + script `--dry-run` first; VMID allowlists for mutations. +- **Observability contract:** every cell exposes **`/health`** (or documented equivalent) and logs to a **single** retention policy. +- **Naming:** FQDN → owner → cell type in docs (already directionally in `FQDN_EXPECTED_CONTENT.md` / E2E lists). + +--- + +## 5. Explicit exclusion (blockchain plane) + +Do **not** fold these into the “hyperscaler-style non-chain cell” menu without a **dedicated** chain runbook merge: + +- Besu validators, sentries, core/public RPC CTs +- Blockscout / explorer stacks +- CCIP / relay / XDC-zero **chain** workers +- Chain 138 deploy RPC paths and token-aggregation **as chain execution** (writes, signers, keeper paths) + +**Boundary nuance:** a **read-only** token-aggregation or quote service that only calls **public** RPC may be operated like an **edge-adjacent** app cell; anything holding **keys**, executing **writes**, or coupling to **validator** timing stays on the **chain plane**. + +They remain a **separate plane** with different SLOs, upgrade windows, and safety rules. The **non-chain** ecosystem **integrates** with them only via **documented APIs and RPC URLs**, not by sharing generic web cells. + +--- + +## 6. Related documents + +- [SANKOFA_PHOENIX_CONSOLIDATED_FRONTEND_AND_API.md](./SANKOFA_PHOENIX_CONSOLIDATED_FRONTEND_AND_API.md) +- [SANKOFA_R630_01_CONSOLIDATION_AND_HUB_PLACEMENT_GOAL.md](../03-deployment/SANKOFA_R630_01_CONSOLIDATION_AND_HUB_PLACEMENT_GOAL.md) +- [SANKOFA_PHOENIX_CANONICAL_BOUNDARIES_AND_TAXONOMY.md](./SANKOFA_PHOENIX_CANONICAL_BOUNDARIES_AND_TAXONOMY.md) +- [PROXMOX_LOAD_BALANCING_RUNBOOK.md](../04-configuration/PROXMOX_LOAD_BALANCING_RUNBOOK.md) +- [PUBLIC_SECTOR_TENANCY_MARKETPLACE_AND_DEPLOYMENT_BASELINE.md](./PUBLIC_SECTOR_TENANCY_MARKETPLACE_AND_DEPLOYMENT_BASELINE.md) (tenancy and catalog vs marketing) +- [NON_CHAIN_ECOSYSTEM_PLAN_REVIEW_AND_GAPS.md](./NON_CHAIN_ECOSYSTEM_PLAN_REVIEW_AND_GAPS.md) (inconsistencies, P0/P1 backlog, NPM/WebSocket/`TRUST_PROXY`) + +--- + +## 7. Adoption (incremental) + +You do not need a “big bang.” Order of operations: + +1. Name current CTs against the **cell types** in section 3; mark gaps. +2. Stand up **one** edge-static or API-hub cell on a **non–r630-01** node as a template. +3. Migrate **lowest-risk** FQDNs (static marketing) first; then API hub; then SSR if needed. +4. Retire redundant CTs after rollback window; update inventory and `get_host_for_vmid`. + +Fill a short decision log in [SANKOFA_R630_01_CONSOLIDATION_AND_HUB_PLACEMENT_GOAL.md](../03-deployment/SANKOFA_R630_01_CONSOLIDATION_AND_HUB_PLACEMENT_GOAL.md) as you execute. diff --git a/docs/02-architecture/NON_CHAIN_ECOSYSTEM_PLAN_REVIEW_AND_GAPS.md b/docs/02-architecture/NON_CHAIN_ECOSYSTEM_PLAN_REVIEW_AND_GAPS.md new file mode 100644 index 00000000..9d54ae05 --- /dev/null +++ b/docs/02-architecture/NON_CHAIN_ECOSYSTEM_PLAN_REVIEW_AND_GAPS.md @@ -0,0 +1,166 @@ +# Non-chain ecosystem plan — detailed review, gaps, and inconsistencies + +**Purpose:** Critical review of the consolidated Phoenix / web hub / r630-01 offload / hyperscaler-style documents and scripts as of **2026-04-13**. Use this as a **remediation backlog**; update linked docs when items close. + +**Scope reviewed:** +[NON_CHAIN_ECOSYSTEM_HYPERSCALER_STYLE_MODEL.md](./NON_CHAIN_ECOSYSTEM_HYPERSCALER_STYLE_MODEL.md), +[SANKOFA_PHOENIX_CONSOLIDATED_FRONTEND_AND_API.md](./SANKOFA_PHOENIX_CONSOLIDATED_FRONTEND_AND_API.md), +[SANKOFA_R630_01_CONSOLIDATION_AND_HUB_PLACEMENT_GOAL.md](../03-deployment/SANKOFA_R630_01_CONSOLIDATION_AND_HUB_PLACEMENT_GOAL.md), +`scripts/deployment/install-sankofa-api-hub-nginx-on-pve.sh`, +`scripts/verify/verify-sankofa-consolidated-hub-lan.sh`, +`config/ip-addresses.conf` hub defaults, +`scripts/lib/load-project-env.sh` `get_host_for_vmid`. + +--- + +## 1. Cross-document consistency + +| Topic | Hyper-scaler model | Consolidated hub doc | r630-01 goal doc | Verdict | +|-------|---------------------|----------------------|------------------|---------| +| Chain vs non-chain boundary | Explicit exclusion list | Matches | Matches | **Aligned** | +| API hub Tier 1 | Gateway row | Tier 1 nginx | Phase 2 move hub off 7800 | **Aligned**; live state (hub on **7800**) is **interim** per r630 doc | +| Web hub | Edge-static / SSR cells | Options A/B/C | Phase 1 | **Aligned** | +| Load relief | Fewer cells + placement | “Moving hubs” note | Non-goal: nginx CPU on same node | **Aligned** | +| NPM | Single edge story | Fewer upstream IPs possible | NPM repoint | **Partial gap:** NPM often still **one row per FQDN**; “fewer rows” is **upstream IP convergence**, not necessarily fewer proxy host records (see §4.1). | + +--- + +## 2. Technical gaps (must fix in implementation, not only docs) + +### 2.1 `TRUST_PROXY` and client IP for `dbis_core` (high) + +**Issue:** Tier-1 nginx forwards `X-Forwarded-For` / `X-Real-IP`, but `dbis_core` IRU rate limits and abuse logic require **`TRUST_PROXY=1`** (and correct **trusted hop**: NPM → hub → app). If `dbis_core` does not trust the hub IP, it sees **only the hub’s** LAN address for all users. + +**Remediation:** Document in cutover checklist: set `TRUST_PROXY=1` on `dbis_core` **and** restrict trusted proxy list to **NPM** and **API hub** subnets/IPs. Add integration test: rate limit key changes when `X-Forwarded-For` varies. +**Doc fix:** Already mentioned in consolidated §3.3; add explicit **“before NPM → hub cutover”** gate in [SANKOFA_PHOENIX_CONSOLIDATED_FRONTEND_AND_API.md](./SANKOFA_PHOENIX_CONSOLIDATED_FRONTEND_AND_API.md) operator checklist. +**Repo (2026-04-13):** `dbis_core` supports **`TRUST_PROXY_HOPS`** (1–10) so Express `trust proxy` matches NPM-only vs NPM→hub→app; see `dbis_core/.env.example`. IP allowlisting for proxies remains an ops/network task. + +### 2.2 GraphQL WebSocket through NPM + hub (high) + +**Issue:** `graphql-ws` requires **Upgrade** end-to-end. NPM custom locations must **allow WebSockets**; hub nginx already sets `Upgrade` / `Connection` to Apollo. If NPM strips or times out upgrades, subscriptions break **silently** for some clients. + +**Remediation:** Add explicit E2E: `wscat` or Apollo subscription smoke **through public URL** after any NPM port/path change. Document NPM “Websockets support” toggle if applicable. +**Repo:** `scripts/verify/smoke-phoenix-graphql-wss-public.sh` (curl **HTTP 101** upgrade on `wss://…/graphql-ws`; use `PHOENIX_WSS_INCLUDE_LAN=1` for hub `:8080`). + +### 2.3 CORS and browser origins (medium) + +**Issue:** Consolidated doc says CORS allowlist “web hub FQDNs only.” Browsers calling **`https://phoenix.sankofa.nexus/graphql`** from **`https://portal.sankofa.nexus`** are **cross-origin**; allowlist must include **portal**, **admin**, **studio**, and any SPA origins that call the API—not only the web hub static hostnames. + +**Remediation:** Replace wording with **“all documented browser origins that invoke Phoenix or `dbis_core` from the browser.”** Cross-ref [SANKOFA_MARKETPLACE_SURFACES.md](../03-deployment/SANKOFA_MARKETPLACE_SURFACES.md) for IRU public routes. + +### 2.4 Health check path in operator checklist (low — doc error) + +**Issue:** Cutover checklist suggested `GET /api/v1/health`; `dbis_core` exposes **`/health`** and **`/v1/health`**, not under `/api/v1/`. + +**Remediation:** Checklist corrected in consolidated doc to **`/health` via hub** (`/api/` prefix does not apply to root health). + +### 2.5 Dual public paths (4000 vs 8080) during migration (medium) + +**Issue:** While both ports are open, **clients can bypass** hub policies (CORS, future WAF) by targeting **:4000** directly if firewalled only at NPM. Hyperscaler model prefers **one** ingress. + +**Remediation:** After NPM cutover to **8080**, **firewall** Phoenix **:4000** to **localhost + hub IP only** on CT 7800, or bind Apollo to **127.0.0.1** only (application config change—needs Phoenix runbook). +**Repo (2026-04-13):** `scripts/deployment/ensure-sankofa-phoenix-apollo-bind-loopback-7800.sh` sets **`HOST=127.0.0.1`** for Fastify on **7800** when hub upstream is **127.0.0.1:4000**. + +### 2.6 Stock `nginx` package disabled on 7800 (medium) + +**Issue:** Installer `systemctl disable nginx` removes the default **Debian `nginx.service`**. If operators expect `nginx` for ad-hoc static files on that CT, they lose it. Today intentional for **dedicated** `sankofa-phoenix-api-hub.service`. + +**Remediation:** Document on CT 7800: **only** `sankofa-phoenix-api-hub` serves nginx; do not re-enable stock unit without conflict check. + +### 2.7 `proxy_pass` URI and trailing slashes (low) + +**Issue:** `location /api/` + `proxy_pass http://dbis_core_rest;` preserves URI prefix—correct for `dbis_core` mounted at `/api/v1`. If any route is mounted at root on upstream, mismatch possible. + +**Remediation:** Keep; add note: new BFF routes must use **distinct prefixes** (`/bff/`) to avoid colliding with Apollo or `dbis_core`. + +--- + +## 3. Inventory and automation gaps + +### 3.1 `get_host_for_vmid` omits explicit Sankofa VMIDs (medium) + +**Issue:** Sankofa stack VMIDs **7800–7806** fell through to **default** `*)` → r630-01. Behavior matched inventory but was **implicit**—easy to break if default changes. + +**Remediation:** Add explicit `7800|7801|7802|7803|7806` case arm to `get_host_for_vmid` with comment “Sankofa Phoenix stack — verify with `pct list` when migrating.” +**Repo (2026-04-13):** Explicit **`7800–7806`** arm on r630-01 in `scripts/lib/load-project-env.sh` (includes gov portals 7804 and studio 7805). + +### 3.2 Fleet scripts and hub env vars (medium) + +**Issue:** `IP_SANKOFA_PHOENIX_API_HUB` / `SANKOFA_PHOENIX_API_HUB_PORT` exist in `ip-addresses.conf`, but **`update-npmplus-proxy-hosts-api.sh`** (and friends) may still **hardcode** or use only `IP_SANKOFA_PHOENIX_API` + `4000`. + +**Remediation:** Grep fleet scripts; add optional branch: when `SANKOFA_PHOENIX_API_HUB_PORT=8080` and flag file or env `SANKOFA_NPM_USE_API_HUB=1`, emit upstream **:8080**. Until then, document **manual** NPM row for hub cutover. +**Repo (2026-04-13):** `update-npmplus-proxy-hosts-api.sh` uses **`SANKOFA_NPM_PHOENIX_PORT`** (default `SANKOFA_PHOENIX_API_PORT`) and **`IP_SANKOFA_NPM_PHOENIX_API`** for `phoenix.sankofa.nexus` / `www.phoenix`. See [SANKOFA_API_HUB_NPM_CUTOVER_AND_POST_CUTOVER_RUNBOOK.md](../03-deployment/SANKOFA_API_HUB_NPM_CUTOVER_AND_POST_CUTOVER_RUNBOOK.md). + +### 3.3 `PROXMOX_HOST` for install script (low) + +**Issue:** `install-sankofa-api-hub-nginx-on-pve.sh` defaults `PROXMOX_HOST` to r630-01. For hub on **r630-04**, operator must export `PROXMOX_HOST`—easy to miss. + +**Remediation:** Script header already mentions; add **one-line echo** of resolved host at start of `--apply` (done partially); extend dry-run to print `get_host_for_vmid` suggestion when `SANKOFA_API_HUB_TARGET_NODE` set (future env). +**Repo (2026-04-13):** Header states **PROXMOX_HOST = PVE node**; dry-run prints **`get_host_for_vmid`** when `load-project-env.sh` is sourced. + +--- + +## 4. Hyperscaler model — internal tensions + +### 4.1 “Single edge” vs NPM reality + +**Tension:** Model says NPM is the **only** public entry contract. Technically true for TLS, but **NPM** often implements **one proxy host per FQDN**. Hyperscalers use **one ALB** with many rules. **Semantic alignment:** treat NPM as **ALB-equivalent**; “single edge” means **single trust and cert pipeline**, not literally one row. + +### 4.2 Static-first IRU / marketplace + +**Tension:** [SANKOFA_PHOENIX_CONSOLIDATED_FRONTEND_AND_API.md](./SANKOFA_PHOENIX_CONSOLIDATED_FRONTEND_AND_API.md) suggests static export for IRU/marketplace **where compatible**. Today much of partner discovery is **dynamic** (`dbis_core` + Phoenix marketplace). **Over-optimistic** without a “dynamic shell + CDN” alternative. + +**Remediation:** In NON_CHAIN doc §3, clarify **Edge-static** is for **marketing and post-login SPAs that only call APIs**; **IRU public catalog** may remain **Edge-SSR** or **API-driven SPA** until a static export pipeline exists. + +### 4.3 Token-aggregation and “chain plane” boundary + +**Tension:** [NON_CHAIN_ECOSYSTEM_HYPERSCALER_STYLE_MODEL.md](./NON_CHAIN_ECOSYSTEM_HYPERSCALER_STYLE_MODEL.md) excludes **token-aggregation runtime tied to chain RPC**. Many deployments colocate **token-aggregation** with **explorer** or **info** nginx—**hybrid**. Risk: teams mis-classify a service and consolidate wrong CT. + +**Remediation:** Add one line: **“Token-aggregation API that only proxies to public RPC may be treated as edge-adjacent; workers that hold keys or execute chain writes stay chain-plane.”** + +### 4.4 Postgres coupling + +**Tension:** r630 doc says stack is **tightly coupled** for latency. Hyperscaler “managed DB” often implies **network separation**. Acceptable as **single-AZ** pattern; document **when** splitting Phoenix API from **7803** Postgres requires **read replicas** or **connection pooler** (PgBouncer) first. + +--- + +## 5. Missing runbook sections (add over time) + +| Missing item | Why it matters | +|--------------|----------------| +| **Backup/restore** before hub install and before `pct migrate` | Hub nginx does not replace backup discipline for Postgres / Keycloak. | +| **Keycloak redirect URIs** when origins move to web hub IP/hostnames | OIDC failures post-cutover. | +| **Certificate issuance** when many FQDNs share one upstream IP | NPM still requests certs per host; rate limits / ACME. | +| **Rollback:** restore NPM upstream + `systemctl start nginx` on 7800? | Dual-stack rollback path. | +| **SLO / error budget** | Hyperscaler practice; currently implicit. | +| **CI for `nginx -t`** on example configs | GitHub Actions: `.github/workflows/validate-sankofa-nginx-examples.yml` (Gitea: mirror or add equivalent workflow). | + +--- + +## 6. Document maintenance items (quick fixes) + +1. **Consolidated doc §5** — ensure artifact table always lists **`install-sankofa-api-hub-nginx-on-pve.sh`** and **`verify-sankofa-consolidated-hub-lan.sh`** next to other operator scripts. +2. **Consolidated §3.2 Tier 1** — prefer **LAN upstream to `dbis_core`** as the default narrative (colocated `127.0.0.1:3000` is the special case). **Clarified** in repo. +3. **Decision log** — “Web hub pattern” vs filled API tier: use **TBD / interim** until a web hub is chosen. **Updated** in repo. +4. **This file** linked from [NON_CHAIN_ECOSYSTEM_HYPERSCALER_STYLE_MODEL.md](./NON_CHAIN_ECOSYSTEM_HYPERSCALER_STYLE_MODEL.md) §6 and [MASTER_INDEX.md](../MASTER_INDEX.md). + +--- + +## 7. Prioritized remediation backlog + +| Priority | Item | Owner | +|----------|------|--------| +| P0 | Verify `TRUST_PROXY` + **`TRUST_PROXY_HOPS`** + production trust boundaries for `dbis_core` when using hub | **LAN:** `TRUST_PROXY=1` on **10150/10151** via `ensure-dbis-api-trust-proxy-on-ct.sh`; validate rate-limit keys from two public IPs | +| P0 | WebSocket E2E through NPM after hub port change | **Done:** `smoke-phoenix-graphql-wss-public.sh` → **HTTP 101**; `pnpm run verify:phoenix-graphql-ws-subscription` → **connection_ack** (remove unused `@fastify/websocket` on 7800 if RSV1; see runbook). | +| P1 | CORS / allowed origins list includes all browser callers | App + API | +| P1 | Firewall or bind Apollo to localhost after NPM → 8080 | **Done:** `ensure-sankofa-phoenix-apollo-bind-loopback-7800.sh` on **7800** (or use firewall plan if HOST cannot be set) | +| P2 | Explicit `get_host_for_vmid` entries for 7800–7806 | **Done** in `load-project-env.sh` — re-verify on migrate | +| P2 | NPM fleet **`SANKOFA_NPM_PHOENIX_PORT`** / **`IP_SANKOFA_NPM_PHOENIX_API`** | **Done** in `update-npmplus-proxy-hosts-api.sh` | +| P3 | Backup/rollback runbook sections | [SANKOFA_API_HUB_NPM_CUTOVER_AND_POST_CUTOVER_RUNBOOK.md](../03-deployment/SANKOFA_API_HUB_NPM_CUTOVER_AND_POST_CUTOVER_RUNBOOK.md) §0 / §5 | +| P3 | Clarify static-first vs dynamic IRU in NON_CHAIN §3 | Docs | + +--- + +## 8. Conclusion + +The plan is **directionally sound**: chain plane separation, cell typing, phased offload from r630-01, and Tier-1 API hub are **consistent**. The largest **gaps** are **operational truth** items (client IP trust, WebSockets, CORS wording, dual-port exposure) and **automation drift** (NPM scripts vs new env vars, implicit VMID→host). Closing **P0–P1** before wide NPM cutover matches how hyperscalers treat **ingress migrations**: prove identity and transport contracts first, then shift traffic. diff --git a/docs/02-architecture/SANKOFA_PHOENIX_CONSOLIDATED_FRONTEND_AND_API.md b/docs/02-architecture/SANKOFA_PHOENIX_CONSOLIDATED_FRONTEND_AND_API.md new file mode 100644 index 00000000..45c28802 --- /dev/null +++ b/docs/02-architecture/SANKOFA_PHOENIX_CONSOLIDATED_FRONTEND_AND_API.md @@ -0,0 +1,158 @@ +# Sankofa Phoenix — consolidated non-chain frontend and API hub + +**Status:** Architecture proposal (resource conservation) +**Last updated:** 2026-04-13 +**LAN status (operator):** Tier-1 API hub **nginx on VMID 7800** listening **`http://192.168.11.50:8080`** (`sankofa-phoenix-api-hub.service`). Apollo (Fastify) binds **`127.0.0.1:4000`** only (`HOST=127.0.0.1` in `/opt/sankofa-api/.env`; apply: `scripts/deployment/ensure-sankofa-phoenix-apollo-bind-loopback-7800.sh`). NPM → **:8080** + WebSocket upgrades is live for `phoenix.sankofa.nexus` (fleet 2026-04-13). Install hub: `scripts/deployment/install-sankofa-api-hub-nginx-on-pve.sh` with `PROXMOX_OPS_APPLY=1` + `PROXMOX_OPS_ALLOWED_VMIDS=7800`. Readiness: `scripts/verify/verify-sankofa-consolidated-hub-lan.sh`, hub GraphQL `scripts/verify/smoke-phoenix-api-hub-lan.sh`, WebSocket upgrade `scripts/verify/smoke-phoenix-graphql-wss-public.sh` (`pnpm run verify:phoenix-graphql-wss`), graphql-ws handshake `pnpm run verify:phoenix-graphql-ws-subscription`, hub `/graphql-ws` headers `scripts/deployment/ensure-sankofa-phoenix-api-hub-graphql-ws-proxy-headers-7800.sh`. + +**r630-01 load goal:** consolidating frontends and **moving hub LXCs** to quieter nodes is what reduces guest count and hypervisor pressure — see [SANKOFA_R630_01_CONSOLIDATION_AND_HUB_PLACEMENT_GOAL.md](../03-deployment/SANKOFA_R630_01_CONSOLIDATION_AND_HUB_PLACEMENT_GOAL.md). + +**Ecosystem shape (non-chain, hyperscaler-style):** [NON_CHAIN_ECOSYSTEM_HYPERSCALER_STYLE_MODEL.md](./NON_CHAIN_ECOSYSTEM_HYPERSCALER_STYLE_MODEL.md) (cell types, edge vs chain plane). + +**Scope:** Non-blockchain Sankofa / Phoenix surfaces only. **Out of scope:** Chain 138 explorer, Besu/RPC, CCIP/relayers, token-aggregation compute — keep those on dedicated LXCs/VMs per existing runbooks. + +--- + +## 1. Problem + +Today, multiple LXCs/VMIDs often run **one primary workload each** (portal, corporate web, Phoenix API, DBIS API, gov dev shells, etc.). Each Node or Next process carries **base RAM** (V8 heap, file watchers in dev, separate copies of dependencies). Nginx-only static sites are cheap; **many separate Node servers are not**. + +This document defines a **consolidated runtime** that: + +1. Puts **all non-chain web frontends** behind **one LAN endpoint** (one LXC or one Docker host — your choice), using **static-first** or **one Node process** where SSR is required. +2. Puts **all Phoenix-facing backend traffic** behind **one logical API** (one public origin and port): GraphQL (current Phoenix), REST/BFF (`dbis_core` and future middleware), health, and webhooks. + +Canonical surface taxonomy remains [SANKOFA_PHOENIX_CANONICAL_BOUNDARIES_AND_TAXONOMY.md](./SANKOFA_PHOENIX_CANONICAL_BOUNDARIES_AND_TAXONOMY.md). Consolidation changes **packaging**, not the names of visitor vs client vs operator paths. + +--- + +## 2. Single “web hub” LXC (frontends) + +### 2.1 Option A — Static-first (lowest RAM) + +**When:** Marketing pages, IRU/marketplace **after** static export, simple entity microsites, post-login SPAs that call the API hub only. + +- Build: `next build` with `output: 'export'` **where compatible** (no server-only APIs on those routes). +- Serve: **nginx** with one `server` per FQDN (`server_name`) or one server + `map $host $site_root` → different `root` directories under `/var/www/...`. +- **NPM:** All affected FQDNs point to the **same** upstream `http://:80`. + +**Tradeoff:** NextAuth / OIDC callback flows and server components need either **client-only OIDC** (PKCE) against Keycloak or a **small** SSR slice (see option B). + +### 2.2 Option B — One Node process for all SSR Next apps (moderate RAM) + +**When:** Portal (`portal.sankofa.nexus`), admin, or any app that must keep `getServerSideProps`, NextAuth, or middleware. + +- **Monorepo** (e.g. Turborepo/Nx): multiple Next “apps” merged into **one deployable** using: + - **Next multi-zone** (primary + mounted sub-apps), or + - **Single Next 15 app** with `middleware.ts` rewriting by `Host`, or + - **Single custom server** (less ideal) proxying to child apps — avoid unless necessary. + +**Outcome:** One `node` process (or one `standalone` output + one PID supervisor) on **one port** (e.g. 3000). Nginx in front optional (TLS termination usually at NPM). + +### 2.3 Option C — Hybrid (practical migration) + +- **nginx:** static corporate apex, static entity sites, docs mirrors. +- **One Node:** portal + Phoenix “shell” that must stay dynamic. + +Still **fewer** LXCs than “one LXC per microsite.” + +### 2.4 What stays out of this box + +- Blockscout / explorer stacks +- `info.defi-oracle.io`, MEV GUI, relay health — separate nginx LXCs as today unless you explicitly merge **static** mirrors only +- Keycloak — **keep separate** (identity is its own security domain) + +--- + +## 3. Single consolidated API (Phoenix hub) + +### 3.1 Responsibilities + +| Path family | Today (typical) | Hub role | +|-------------|-----------------|----------| +| `/graphql`, `/graphql-ws` | Phoenix VMID 7800 :4000 | **Reverse proxy** to existing Apollo until merged in code | +| `/api/v1/*`, `/api-docs` | `dbis_core` (e.g. :3000) | **Reverse proxy** mount | +| `/health` | Multiple | **Aggregate** (optional): hub returns 200 only if subgraphs pass | +| Future BFF | N/A | **Implement in hub** (session, composition, rate limits) | + +**Naming:** Introduce an internal service name e.g. `sankofa-phoenix-hub-api`. Public FQDN can remain `phoenix.sankofa.nexus` or split to `api.phoenix.sankofa.nexus` for clarity; NPM decides. + +### 3.2 Implementation tiers (phased) + +**Tier 1 — Thin hub (fastest, lowest risk)** +One process: **nginx** or **Caddy**. **Typical production pattern:** hub on its own LXC or same CT as Apollo — `proxy_pass` Phoenix to **`127.0.0.1:4000`** when colocated, and `dbis_core` to **`IP_DBIS_API:3000`** (LAN) as in `install-sankofa-api-hub-nginx-on-pve.sh`. **Single public port** (e.g. 443 behind NPM → **8080** on the hub). Before NPM sends public traffic to the hub, validate **`TRUST_PROXY`** and trusted proxy hops for `dbis_core` (see [NON_CHAIN_ECOSYSTEM_PLAN_REVIEW_AND_GAPS.md](./NON_CHAIN_ECOSYSTEM_PLAN_REVIEW_AND_GAPS.md) §2.1). + +**Tier 2 — Application hub** +Single **Node** (Fastify/Express) app: validates JWT once, applies rate limits, `proxy` to subgraphs, adds **BFF** routes (`/bff/portal/...`). + +**Tier 3 — Monolith (long-term)** +Merge routers and schema into one codebase — only after boundaries and ownership are clear. + +### 3.3 Middleware cross-cutting + +Centralize in the hub: + +- **CORS** allowlist (origins = web hub FQDNs only) +- **Rate limiting** (especially IRU public POST — align with `dbis_core` **`TRUST_PROXY=1`** and a **trusted proxy list** that includes NPM and this hub’s LAN IP, or rate limits see only the hub) +- **Request ID** propagation +- **mTLS** or IP allowlist for operator-only routes (optional) + +--- + +## 4. NPM and inventory + +After cutover: + +- **Fewer distinct upstream IPs** in NPM (many FQDNs can point at the **same** `IP:port`); NPM may still use **one proxy host record per FQDN** for TLS—equivalent to one ALB with many listener rules, not literally one row total. Host-based routing then lives in **web hub** nginx (`server_name` / `map`) or in **Next** `middleware.ts`. +- Update [ALL_VMIDS_ENDPOINTS.md](../04-configuration/ALL_VMIDS_ENDPOINTS.md) and `get_host_for_vmid` in `scripts/lib/load-project-env.sh` when VMIDs are **retired** or **replaced** by hub VMIDs. +- **`config/ip-addresses.conf`** defines optional hub variables that **default to the current discrete CT IPs** (`IP_SANKOFA_WEB_HUB` → portal IP, `IP_SANKOFA_PHOENIX_API_HUB` → Phoenix API IP). Override in `.env` when hub LXCs exist. + +--- + +## 5. Concrete file references in this repo + +| Artifact | Purpose | +|----------|---------| +| [config/nginx/sankofa-non-chain-frontends.example.conf](../../config/nginx/sankofa-non-chain-frontends.example.conf) | Example **host → static root** nginx for web hub | +| [config/nginx/sankofa-phoenix-api-hub.example.conf](../../config/nginx/sankofa-phoenix-api-hub.example.conf) | Example **path → upstream** for API hub (Tier 1); tune `upstream` to LAN or `127.0.0.1` when colocated | +| [config/nginx/sankofa-hub-main.example.conf](../../config/nginx/sankofa-hub-main.example.conf) | Top-level `nginx.conf` for web hub CT (`-c` for systemd) | +| [config/nginx/sankofa-api-hub-main.example.conf](../../config/nginx/sankofa-api-hub-main.example.conf) | Top-level `nginx.conf` for API hub CT | +| [config/systemd/sankofa-non-chain-web-hub-nginx.service.example](../../config/systemd/sankofa-non-chain-web-hub-nginx.service.example) | systemd unit for web hub nginx | +| [config/systemd/sankofa-phoenix-api-hub-nginx.service.example](../../config/systemd/sankofa-phoenix-api-hub-nginx.service.example) | systemd unit for API hub nginx | +| [config/compose/sankofa-consolidated-runtime.example.yml](../../config/compose/sankofa-consolidated-runtime.example.yml) | Optional Docker Compose sketch (API hub container only) | +| [scripts/verify/check-sankofa-consolidated-nginx-examples.sh](../../scripts/verify/check-sankofa-consolidated-nginx-examples.sh) | **`nginx -t`** on example snippets (host `nginx` or **Docker** fallback) | +| [scripts/deployment/plan-sankofa-consolidated-hub-cutover.sh](../../scripts/deployment/plan-sankofa-consolidated-hub-cutover.sh) | Read-only cutover reminder + resolved env from `load-project-env.sh` | +| [scripts/deployment/install-sankofa-api-hub-nginx-on-pve.sh](../../scripts/deployment/install-sankofa-api-hub-nginx-on-pve.sh) | Tier-1 hub install on CT (`--dry-run` / `--apply` + `PROXMOX_OPS_*`) | +| [scripts/verify/verify-sankofa-consolidated-hub-lan.sh](../../scripts/verify/verify-sankofa-consolidated-hub-lan.sh) | Read-only LAN smoke (Phoenix, portal, dbis `/health`, Keycloak realm) | + +--- + +## 6. Operator cutover checklist (complete in order) + +1. Run `bash scripts/verify/check-sankofa-consolidated-nginx-examples.sh` (CI or laptop). +2. Provision **one** non-chain web hub LXC and/or **one** API hub LXC (or colocate nginx on an existing CT — document the choice). +3. Copy and edit nginx snippets from `config/nginx/` into `/etc/sankofa-web-hub/` and `/etc/sankofa-phoenix-api-hub/` per systemd examples; install **systemd** units from `config/systemd/*.example` (drop `.example`, adjust paths). +4. Set **`.env`** overrides: `IP_SANKOFA_WEB_HUB`, `SANKOFA_WEB_HUB_PORT`, `IP_SANKOFA_PHOENIX_API_HUB`, `SANKOFA_PHOENIX_API_HUB_PORT` (see `plan-sankofa-consolidated-hub-cutover.sh` output after `source scripts/lib/load-project-env.sh`). +5. **Dry-run** NPM upstream changes; then apply during a maintenance window. Confirm **WebSocket** (GraphQL subscriptions) through NPM if clients use `graphql-ws`. +6. Smoke: `curl -fsS http://:/health`, GraphQL POST to `/graphql`, **`dbis_core`** health via hub as **`GET /api-docs`** or **`GET /health`** on upstream `:3000` through `/api/` only if mounted there — simplest: `curl` **`http://:/api-docs`** (proxied) per [NON_CHAIN_ECOSYSTEM_PLAN_REVIEW_AND_GAPS.md](./NON_CHAIN_ECOSYSTEM_PLAN_REVIEW_AND_GAPS.md) §2.4. +7. Update inventory docs and VMID table; decommission retired CTs only after rollback window. Optionally **bind Apollo to 127.0.0.1:4000** or firewall **:4000** from LAN once NPM uses hub only ([NON_CHAIN_ECOSYSTEM_PLAN_REVIEW_AND_GAPS.md](./NON_CHAIN_ECOSYSTEM_PLAN_REVIEW_AND_GAPS.md) §2.5). + +--- + +## 7. Related docs + +- [SANKOFA_PHOENIX_CANONICAL_BOUNDARIES_AND_TAXONOMY.md](./SANKOFA_PHOENIX_CANONICAL_BOUNDARIES_AND_TAXONOMY.md) +- [SANKOFA_MARKETPLACE_SURFACES.md](../03-deployment/SANKOFA_MARKETPLACE_SURFACES.md) +- [ENTITY_INSTITUTIONS_WEB_PORTAL_COMPLETION.md](../03-deployment/ENTITY_INSTITUTIONS_WEB_PORTAL_COMPLETION.md) +- [SERVICE_DESCRIPTIONS.md](./SERVICE_DESCRIPTIONS.md) +- [NON_CHAIN_ECOSYSTEM_PLAN_REVIEW_AND_GAPS.md](./NON_CHAIN_ECOSYSTEM_PLAN_REVIEW_AND_GAPS.md) (gaps, inconsistencies, P0/P1 backlog) + +--- + +## 8. Decision log (fill when adopted) + +| Decision | Choice | Date | +|----------|--------|------| +| Web hub pattern | **TBD** (interim: discrete CTs; target: A / B / C) | | +| API hub Tier | **1** (nginx on VMID 7800, LAN 2026-04-13) | 2026-04-13 | +| Public API hostname | phoenix.sankofa.nexus (NPM → **8080** hub; Apollo **127.0.0.1:4000**) | 2026-04-13 | +| Retired VMIDs | none | | diff --git a/docs/02-architecture/SERVICE_DESCRIPTIONS.md b/docs/02-architecture/SERVICE_DESCRIPTIONS.md index dab930de..335ec096 100644 --- a/docs/02-architecture/SERVICE_DESCRIPTIONS.md +++ b/docs/02-architecture/SERVICE_DESCRIPTIONS.md @@ -1,10 +1,18 @@ # Sankofa Services - Service Descriptions -**Last Updated:** 2026-03-25 +**Last Updated:** 2026-04-13 **Status:** Active Documentation --- +## Consolidated runtime (optional) + +To reduce LXC count for **non-chain** web and to expose **one** Phoenix-facing API origin (GraphQL + `dbis_core` REST behind path routes), see [SANKOFA_PHOENIX_CONSOLIDATED_FRONTEND_AND_API.md](./SANKOFA_PHOENIX_CONSOLIDATED_FRONTEND_AND_API.md). `config/ip-addresses.conf` adds `IP_SANKOFA_WEB_HUB` and `IP_SANKOFA_PHOENIX_API_HUB` (defaulting to today’s portal and Phoenix API IPs until you set hub LXCs in `.env`). Blockchain-adjacent stacks (explorer, RPC, relayers) stay **out** of this consolidation. + +For **how** the non-chain fleet should be designed (edge cells, API hub, IdP, data) in hyperscaler-style terms—**excluding** the blockchain plane—see [NON_CHAIN_ECOSYSTEM_HYPERSCALER_STYLE_MODEL.md](./NON_CHAIN_ECOSYSTEM_HYPERSCALER_STYLE_MODEL.md). + +--- + ## Brand and Product Relationship ### Company and Product Analogy @@ -41,8 +49,8 @@ This document describes the purpose and function of each service in the Sankofa - **Purpose:** Cloud infrastructure management portal (API service) - **VMID:** 7800 - **IP:** 192.168.11.50 -- **Port:** 4000 -- **External Access:** https://phoenix.sankofa.nexus, https://www.phoenix.sankofa.nexus +- **Port:** **4000** (Apollo direct) and **`8080`** (optional Tier-1 **API hub** nginx: `/graphql` → 4000, `/api` → `dbis_core` on `IP_DBIS_API:3000`) +- **External Access:** https://phoenix.sankofa.nexus, https://www.phoenix.sankofa.nexus (NPM upstream may stay **4000** until you cut over to **8080**) **Details:** - GraphQL API service for Phoenix cloud platform diff --git a/docs/03-deployment/SANKOFA_API_HUB_NPM_CUTOVER_AND_POST_CUTOVER_RUNBOOK.md b/docs/03-deployment/SANKOFA_API_HUB_NPM_CUTOVER_AND_POST_CUTOVER_RUNBOOK.md new file mode 100644 index 00000000..30234fab --- /dev/null +++ b/docs/03-deployment/SANKOFA_API_HUB_NPM_CUTOVER_AND_POST_CUTOVER_RUNBOOK.md @@ -0,0 +1,92 @@ +# Sankofa Phoenix API hub — NPM cutover and post-cutover + +**Purpose:** Ordered steps when moving public `phoenix.sankofa.nexus` traffic from direct Apollo (`:4000`) to Tier-1 nginx on the Phoenix stack (`:8080` by default). Complements [SANKOFA_PHOENIX_CONSOLIDATED_FRONTEND_AND_API.md](../02-architecture/SANKOFA_PHOENIX_CONSOLIDATED_FRONTEND_AND_API.md) and [SANKOFA_R630_01_CONSOLIDATION_AND_HUB_PLACEMENT_GOAL.md](./SANKOFA_R630_01_CONSOLIDATION_AND_HUB_PLACEMENT_GOAL.md). + +**Not covered here:** corporate apex, portal SSO, or Keycloak realm edits (see portal/Keycloak runbooks). + +--- + +## 0. Preconditions + +- API hub installed and healthy on LAN: `curl -sS "http://${IP_SANKOFA_PHOENIX_API:-192.168.11.50}:8080/health"` and a GraphQL POST to `/graphql` succeed. +- Backup: NPM export or UI backup, plus application-level backup if you change Phoenix/dbis systemd or env on the CT. + +--- + +## 1. `dbis_core` (rate limits and `req.ip`) + +1. Set `TRUST_PROXY=1` on the `dbis_core` process (see `scripts/deployment/ensure-dbis-api-trust-proxy-on-ct.sh` for VMIDs **10150** / **10151**). +2. **`TRUST_PROXY_HOPS`** (optional; default `1` in code): Express counts **reverse proxies that terminate the TCP connection to Node** — typically **one** (either NPM **or** the API hub), even when browsers traversed Cloudflare → NPM → hub → `dbis_core`. Raise hops only if your stack adds **another** reverse proxy **in series** directly in front of the same listener (uncommon). When unsure, leave unset and validate `req.ip` / rate-limit keys with two real client IPs. +3. Ensure **`ALLOWED_ORIGINS`** lists every browser origin that calls the API (portal, admin, studio, marketing SPAs as applicable). Production forbids `*`. +4. Restart `dbis_core` and confirm logs show no CORS or startup validation errors. + +--- + +## 2. NPM fleet (`update-npmplus-proxy-hosts-api.sh`) + +1. In repo `.env` (operator workstation), set: + - `SANKOFA_NPM_PHOENIX_PORT=8080` + - Optionally `IP_SANKOFA_NPM_PHOENIX_API=…` if the hub listens on a different LAN IP than `IP_SANKOFA_PHOENIX_API`. +2. Run the fleet script with valid `NPM_*` credentials (same as other NPM updates). +3. In NPM UI, confirm `phoenix.sankofa.nexus` and `www.phoenix.sankofa.nexus` forward WebSockets (subscriptions use `/graphql-ws`). + +--- + +## 3. Verification + +| Check | Command or action | +|--------|-------------------| +| Public HTTPS health | `curl -fsS "https://phoenix.sankofa.nexus/health"` (or hub-exposed health path you standardized) | +| GraphQL | POST `https://phoenix.sankofa.nexus/graphql` with a trivial query | +| WebSocket upgrade (TLS + hub) | `bash scripts/verify/smoke-phoenix-graphql-wss-public.sh` (expects **HTTP 101** via `curl --http1.1`; optional `PHOENIX_WSS_INCLUDE_LAN=1` for hub `:8080`; optional `PHOENIX_WSS_CURL_MAXTIME` default **8**s per probe because curl waits on open WS). Full handshake: `pnpm run verify:phoenix-graphql-ws-subscription` (`connection_init` → `connection_ack`). If Node clients report **RSV1** on `/graphql-ws`, CT **7800** should not register `@fastify/websocket` alongside standalone `ws` (apply `scripts/deployment/ensure-sankofa-phoenix-graphql-ws-remove-fastify-websocket-7800.sh`). **Process crashes on WS disconnect:** `websocket.ts` must import `logger` — `scripts/deployment/ensure-sankofa-phoenix-websocket-ts-import-logger-7800.sh`. Hub nginx: `scripts/deployment/ensure-sankofa-phoenix-api-hub-graphql-ws-proxy-headers-7800.sh` (`Accept-Encoding ""`, `proxy_buffering off` in `/graphql-ws`). Optional host guard: `scripts/deployment/ensure-sankofa-phoenix-7800-nft-dport-4000-guard.sh` + `config/nftables/sankofa-phoenix-7800-guard-dport-4000.nft`. | +| IRU / public limits | Hit a rate-limited route from two different public IPs and confirm keys differ (validates forwarded client IP) | + +--- + +## 4. Post-cutover hardening (dual path) + +After NPM points at `:8080` and traffic is stable: + +- **Bind Apollo to loopback** (recommended when hub upstream is `127.0.0.1:4000`): + `PROXMOX_OPS_APPLY=1` `PROXMOX_OPS_ALLOWED_VMIDS=7800` `bash scripts/deployment/ensure-sankofa-phoenix-apollo-bind-loopback-7800.sh --apply --vmid 7800` + Confirm VLAN cannot connect to `:4000`; hub `:8080` and public `https://phoenix.sankofa.nexus` still work. **Alternative:** host firewall on CT 7800 — see `scripts/deployment/plan-phoenix-apollo-port-4000-restrict-7800.sh --ssh`. +- **Hub `/graphql-ws` proxy headers** (idempotent; safe with existing installs): + `PROXMOX_OPS_APPLY=1` `PROXMOX_OPS_ALLOWED_VMIDS=7800` `bash scripts/deployment/ensure-sankofa-phoenix-api-hub-graphql-ws-proxy-headers-7800.sh --apply --vmid 7800` +- **Hub nginx `ExecReload`** (systemd, idempotent): + `PROXMOX_OPS_APPLY=1` `PROXMOX_OPS_ALLOWED_VMIDS=7800` `bash scripts/deployment/ensure-sankofa-phoenix-api-hub-systemd-exec-reload-7800.sh --apply --vmid 7800` +- **Phoenix API DB migrations** (after DB auth works): + `PROXMOX_OPS_APPLY=1` `PROXMOX_OPS_ALLOWED_VMIDS=7800` `bash scripts/deployment/ensure-sankofa-phoenix-api-db-migrate-up-7800.sh --apply --vmid 7800` +- **Phoenix API `.env` LAN parity** (Keycloak + Sankofa Postgres host, dedupe passwords, `NODE_ENV` policy, `TERMINATE_TLS_AT_EDGE`): + `source scripts/lib/load-project-env.sh` then + `PROXMOX_OPS_APPLY=1` `PROXMOX_OPS_ALLOWED_VMIDS=7800` `bash scripts/deployment/ensure-sankofa-phoenix-api-env-lan-parity-7800.sh --apply --vmid 7800` + Default appends **`NODE_ENV=development`** until `DB_PASSWORD` / `KEYCLOAK_CLIENT_SECRET` meet production length; use **`PHOENIX_API_NODE_ENV=production`** only after secrets and TLS policy are ready. + If Postgres returns **28P01** (auth failed), align **`DB_USER`** (typically **`sankofa`**, not `postgres`) and **`DB_PASSWORD`** with the **`sankofa`** role on VMID **7803** (`ALTER USER … PASSWORD` on the Postgres CT), then run **`ensure-sankofa-phoenix-api-db-migrate-up-7800.sh`** so **`audit_logs`** exists — see [ALL_VMIDS_ENDPOINTS.md](../04-configuration/ALL_VMIDS_ENDPOINTS.md). + For **`PHOENIX_API_NODE_ENV=production`** without local certs: run **`ensure-sankofa-phoenix-tls-config-terminate-at-edge-7800.sh`** first and keep **`TERMINATE_TLS_AT_EDGE=1`** in `.env`. +- Inventory: [ALL_VMIDS_ENDPOINTS.md](../04-configuration/ALL_VMIDS_ENDPOINTS.md) (Phoenix row + VMID 7800 table). + +--- + +## 5. Rollback + +1. Unset `SANKOFA_NPM_PHOENIX_PORT` or set it back to `4000` (or your direct Apollo port). +2. Re-run the NPM fleet script. +3. If `dbis_core` had `TRUST_PROXY_HOPS=2` only for the hub path, reduce hops or disable trust proxy per your direct topology. + +--- + +## 6. References + +- Installer: `scripts/deployment/install-sankofa-api-hub-nginx-on-pve.sh` +- Hub graphql-ws headers (live CT): `scripts/deployment/ensure-sankofa-phoenix-api-hub-graphql-ws-proxy-headers-7800.sh` +- Phoenix `websocket.ts` logger import (prevents crash on disconnect): `scripts/deployment/ensure-sankofa-phoenix-websocket-ts-import-logger-7800.sh` +- Phoenix API `.env` LAN parity: `scripts/deployment/ensure-sankofa-phoenix-api-env-lan-parity-7800.sh` +- Phoenix API DB migrate up (CT 7800): `scripts/deployment/ensure-sankofa-phoenix-api-db-migrate-up-7800.sh` +- Phoenix TLS (terminate at edge, production without local certs): `scripts/deployment/ensure-sankofa-phoenix-tls-config-terminate-at-edge-7800.sh` +- Hub unit `ExecReload`: `scripts/deployment/ensure-sankofa-phoenix-api-hub-systemd-exec-reload-7800.sh` +- LAN smoke: `scripts/verify/verify-sankofa-consolidated-hub-lan.sh` +- Hub GraphQL smoke: `scripts/verify/smoke-phoenix-api-hub-lan.sh` +- Public / LAN WebSocket upgrade smoke: `scripts/verify/smoke-phoenix-graphql-wss-public.sh` +- Loopback bind for Apollo: `scripts/deployment/ensure-sankofa-phoenix-apollo-bind-loopback-7800.sh` +- Read-only plan (firewall alternative): `scripts/deployment/plan-phoenix-apollo-port-4000-restrict-7800.sh` (`--ssh` on LAN) +- Example config syntax: `scripts/verify/check-sankofa-consolidated-nginx-examples.sh` +- Gap review: `docs/02-architecture/NON_CHAIN_ECOSYSTEM_PLAN_REVIEW_AND_GAPS.md` diff --git a/docs/03-deployment/SANKOFA_R630_01_CONSOLIDATION_AND_HUB_PLACEMENT_GOAL.md b/docs/03-deployment/SANKOFA_R630_01_CONSOLIDATION_AND_HUB_PLACEMENT_GOAL.md new file mode 100644 index 00000000..207d89d0 --- /dev/null +++ b/docs/03-deployment/SANKOFA_R630_01_CONSOLIDATION_AND_HUB_PLACEMENT_GOAL.md @@ -0,0 +1,96 @@ +# Goal: relieve r630-01 via consolidation + hub placement (not nginx alone) + +**Status:** Operator goal / runbook +**Last updated:** 2026-04-13 + +## 1. What you are optimizing for + +**Primary goal:** reduce **guest count** and **steady-state CPU / pressure** on **r630-01** (`192.168.11.11`) by: + +1. **Retiring CTs** that only existed to serve **small, non-chain web** surfaces (static or low-SSR), after those surfaces are merged into a **single web hub** guest (or static export + nginx). +2. **Placing new hub LXCs** (nginx-only or low-RAM) on **less busy nodes** (typically **r630-03 / r630-04** per health reports), instead of stacking more edge services on r630-01. +3. **Optionally migrating** existing Sankofa / Phoenix / DBIS-related CTs **off** r630-01 when they are **not** chain-critical for that node. + +**Non-goal:** expecting the **API hub nginx** colocated on VMID **7800** to materially lower r630-01 load. That pattern is for **routing simplicity** and a path to **fewer public upstreams**; load relief comes from **fewer guests** and **better placement**, not from reverse proxy CPU. + +--- + +## 2. Current anchor facts (from inventory docs) + +Treat `pct list` on each node as authoritative when planning; the table below is a **documentation snapshot** of common r630-01-adjacent workloads: + +| Area | Typical on r630-01 today | Notes | +|------|---------------------------|--------| +| Sankofa Phoenix stack | **7800** API, **7801** portal, **7802** Keycloak, **7803** Postgres, **7806** public web | Tightly coupled for latency; migrations need cutover windows | +| DBIS API | **10150** (`IP_DBIS_API`) | Often co-dependent with Phoenix / portal flows | +| NPMplus | **10233** / **10234** (see `ALL_VMIDS_ENDPOINTS.md`) | Edge; may stay on r630-01 or follow your NPM HA policy | +| Chain-critical | **2101**, **2103** (Besu core lanes) | **Do not** “consolidate away” without chain runbooks | + +--- + +## 3. Phased execution (explicit consolidation + placement) + +### Phase 0 — Measure (read-only) + +1. Latest cluster health JSON: `bash scripts/verify/poll-lxc-cluster-health.sh` (writes `reports/status/lxc_cluster_health_*.json`). +2. Rebalance **plan only**: + `bash scripts/verify/plan-lxc-rebalance-from-health-report.sh --source r630-01 --target r630-04 --limit 12` + Adjust `--target` to the node with **headroom** (load, PSI, storage). Review exclusions (chain-critical / infra patterns) in the script output. +3. Record **which VMIDs must stay** on r630-01 vs **candidates to move** in your change ticket. + +### Phase 1 — Consolidate **non-chain web** (fewer guests) + +1. Architecture: [SANKOFA_PHOENIX_CONSOLIDATED_FRONTEND_AND_API.md](../02-architecture/SANKOFA_PHOENIX_CONSOLIDATED_FRONTEND_AND_API.md) (static-first vs one Node process). +2. Build static exports (or one monorepo SSR host) so **multiple FQDNs** can share **one nginx** `server_name` / `map $host` pattern (`config/nginx/sankofa-non-chain-frontends.example.conf`). +3. **Provision the web hub LXC on the target node** (not r630-01 if the goal is offload). Use a **new IP** from your IPAM; update `.env` overrides `IP_SANKOFA_WEB_HUB` / port when ready. +4. NPM dry-run → apply: point marketing / microsite hosts at the web hub upstream. + +**Outcome:** retire legacy one-site-one-CT guests **after** TTL / rollback window. + +### Phase 2 — API hub **placement** (avoid piling onto r630-01) + +**Today:** Tier-1 API hub nginx may be colocated on **7800** (same CT as Apollo) for a fast LAN proof — that does **not** reduce r630-01 guest count. + +**Target pattern for load relief:** + +1. Create a **small** Debian LXC on **r630-03 or r630-04** (dedicated “phoenix-api-hub” VMID), **only** nginx + `sankofa-phoenix-api-hub.service`. +2. Upstreams in that hub: `proxy_pass` to **LAN IPs** of **7800:4000** (GraphQL) and **10150:3000** (`dbis_core`) — cross-node proxy is fine on VLAN 11. +3. Run `install-sankofa-api-hub-nginx-on-pve.sh` with `--vmid ` on the **target** node’s PVE host (set `PROXMOX_HOST` if not r630-01). +4. NPM: point `phoenix.sankofa.nexus` to **hub IP:8080** (or keep **4000** direct until validated). Before declaring success, run **WebSocket** smoke (`graphql-ws` through NPM) and confirm **`dbis_core` `TRUST_PROXY`** + trusted proxy list include the hub (see [NON_CHAIN_ECOSYSTEM_PLAN_REVIEW_AND_GAPS.md](../02-architecture/NON_CHAIN_ECOSYSTEM_PLAN_REVIEW_AND_GAPS.md) §2.1–2.2). +5. **Disable / remove** hub nginx from **7800** if you no longer want dual stacks (maintenance window; validate `systemctl stop sankofa-phoenix-api-hub` on 7800 only after NPM uses the new hub). + +**Outcome:** Phoenix CT can stay on r630-01 for DB locality, while **edge proxy RAM/CPU** sits on a lighter node — or later migrate 7800 itself after Phase 3. + +### Phase 3 — Migrate heavy CTs (optional, highest impact) + +Use **scoped** `pct migrate` from the planner output. Rules from project safety: + +- Named VMID list, **dry-run** first, maintenance window, rollback IP/NPM plan. +- After any move: update `get_host_for_vmid` in `scripts/lib/load-project-env.sh` and [ALL_VMIDS_ENDPOINTS.md](../04-configuration/ALL_VMIDS_ENDPOINTS.md). + +### Phase 4 — Retire + verify + +1. Destroy **only** CTs that are fully replaced (config backups, DNS, NPM rows removed). +2. Re-run health poll + E2E verifier profile for public hosts you moved. + +--- + +## 4. Decision record (fill as you execute) + +| Decision | Choice | Date | +|----------|--------|------| +| Web hub target node | r630-0? | | +| API hub target node (nginx-only LXC) | r630-0? | | +| NPM phoenix upstream | :4000 direct / :8080 hub | | +| VMIDs retired after consolidation | | | + +--- + +## 5. Related references + +- [NON_CHAIN_ECOSYSTEM_HYPERSCALER_STYLE_MODEL.md](../02-architecture/NON_CHAIN_ECOSYSTEM_HYPERSCALER_STYLE_MODEL.md) (cell types, edge plane vs chain plane) +- [SANKOFA_PHOENIX_CONSOLIDATED_FRONTEND_AND_API.md](../02-architecture/SANKOFA_PHOENIX_CONSOLIDATED_FRONTEND_AND_API.md) +- [PROXMOX_LOAD_BALANCING_RUNBOOK.md](../04-configuration/PROXMOX_LOAD_BALANCING_RUNBOOK.md) +- [ALL_VMIDS_ENDPOINTS.md](../04-configuration/ALL_VMIDS_ENDPOINTS.md) +- `scripts/deployment/install-sankofa-api-hub-nginx-on-pve.sh` +- `scripts/verify/verify-sankofa-consolidated-hub-lan.sh` diff --git a/docs/04-configuration/ALL_VMIDS_ENDPOINTS.md b/docs/04-configuration/ALL_VMIDS_ENDPOINTS.md index c06b9850..9fd6d4d8 100644 --- a/docs/04-configuration/ALL_VMIDS_ENDPOINTS.md +++ b/docs/04-configuration/ALL_VMIDS_ENDPOINTS.md @@ -334,7 +334,7 @@ The following VMIDs were the older `25xx` RPC identities before the `21xx/22xx/2 | VMID | IP Address | Hostname | Status | Endpoints | Purpose | |------|------------|----------|--------|-----------|---------| -| 7800 | 192.168.11.50 | sankofa-api-1 | ✅ Running | GraphQL: 4000, Health: /health | Phoenix API (Cloud Platform Portal) | +| 7800 | 192.168.11.50 | sankofa-api-1 | ✅ Running | **Apollo :4000** loopback-only (`HOST=127.0.0.1`); **Tier-1 hub :8080** (`/graphql`→127.0.0.1:4000); hub `/health` | Phoenix API (Cloud Platform Portal) | | 7801 | 192.168.11.51 | sankofa-portal-1 | ✅ Running | Web: 3000 | Hybrid cloud **client portal** (`portal.sankofa.nexus` / `admin.sankofa.nexus` when NPM routes); not the long-term corporate apex app — see `IP_SANKOFA_PUBLIC_WEB` / `sync-sankofa-public-web-to-ct.sh` | | 7802 | 192.168.11.52 | sankofa-keycloak-1 | ✅ Running | Keycloak: 8080, Admin: /admin | Identity and Access Management | | 7803 | 192.168.11.53 | sankofa-postgres-1 | ✅ Running | PostgreSQL: 5432 | Database Service | @@ -346,8 +346,8 @@ The following VMIDs were the older `25xx` RPC identities before the `21xx/22xx/2 - `sankofa.nexus` / `www.sankofa.nexus` → **`IP_SANKOFA_PUBLIC_WEB`:`SANKOFA_PUBLIC_WEB_PORT** (canonical current target: **7806** `192.168.11.63:3000`). Fleet script: `scripts/nginx-proxy-manager/update-npmplus-proxy-hosts-api.sh`. **`www`** → **301** → apex `https://sankofa.nexus` (`$request_uri`). ✅ - `portal.sankofa.nexus` / `admin.sankofa.nexus` → **`IP_SANKOFA_CLIENT_SSO`:`SANKOFA_CLIENT_SSO_PORT** (typical: 7801 `:3000`). NextAuth / OIDC public URL: **`https://portal.sankofa.nexus`**. ✅ when NPM proxy rows exist (fleet script creates/updates them). - `dash.sankofa.nexus` → Set **`IP_SANKOFA_DASH`** (+ `SANKOFA_DASH_PORT`) in `config/ip-addresses.conf` to enable upstream in the fleet script; IP allowlist at NPM is operator policy. 🔶 until dash app + env are set. -- `phoenix.sankofa.nexus` → Routes to `http://192.168.11.50:4000` (Phoenix API/VMID 7800) ✅ -- `www.phoenix.sankofa.nexus` → Same upstream; **301** to **`https://phoenix.sankofa.nexus`**. ✅ +- `phoenix.sankofa.nexus` → NPM upstream **`http://192.168.11.50:8080`** (Tier-1 API hub on VMID **7800**; WebSocket upgrades **on**). Apollo listens on **`127.0.0.1:4000`** only (not reachable from VLAN); hub proxies to loopback. ✅ (2026-04-13 fleet + loopback bind) +- `www.phoenix.sankofa.nexus` → Same **:8080** upstream; **301** to **`https://phoenix.sankofa.nexus`**. ✅ - `the-order.sankofa.nexus` / `www.the-order.sankofa.nexus` → OSJ management portal (secure auth). App source: **the_order** at `~/projects/the_order`. NPMplus default upstream: **order-haproxy** `http://192.168.11.39:80` (VMID **10210**), which proxies to Sankofa portal `http://192.168.11.51:3000` (7801). Fallback: set `THE_ORDER_UPSTREAM_IP` / `THE_ORDER_UPSTREAM_PORT` to `.51` / `3000` if HAProxy is offline. **`www.the-order.sankofa.nexus`** → **301** **`https://the-order.sankofa.nexus`** (same as `www.sankofa` / `www.phoenix`). - `studio.sankofa.nexus` → Routes to `http://192.168.11.72:8000` (Sankofa Studio / VMID 7805; app-owned `/` → `/studio/` redirect) @@ -614,13 +614,14 @@ This section lists all endpoints that should be configured in NPMplus, extracted | `secure.mim4u.org` | `192.168.11.37` | `http` | `80` | ❌ No | MIM4U Secure Portal (VMID 7810) | | `training.mim4u.org` | `192.168.11.37` | `http` | `80` | ❌ No | MIM4U Training Portal (VMID 7810) | | **Sankofa Phoenix Services** | +| *(optional hub)* | **`IP_SANKOFA_WEB_HUB`** / **`IP_SANKOFA_PHOENIX_API_HUB`** (default in `config/ip-addresses.conf` = portal / Phoenix API until `.env` overrides) | `http` | per hub nginx | ❌ No | Consolidated non-chain web + path API hub — see `docs/02-architecture/SANKOFA_PHOENIX_CONSOLIDATED_FRONTEND_AND_API.md` | | `sankofa.nexus` | **`IP_SANKOFA_PUBLIC_WEB`** (`192.168.11.63` on VMID 7806 in the current deployment) | `http` | **`SANKOFA_PUBLIC_WEB_PORT`** (`3000`) | ❌ No | Corporate apex; fleet script `update-npmplus-proxy-hosts-api.sh` | | `www.sankofa.nexus` | same as apex | `http` | same | ❌ No | **301** → `https://sankofa.nexus` | | `portal.sankofa.nexus` | **`IP_SANKOFA_CLIENT_SSO`** (typ. `.51` / 7801) | `http` | **`SANKOFA_CLIENT_SSO_PORT`** (`3000`) | ❌ No | Client SSO portal; `NEXTAUTH_URL=https://portal.sankofa.nexus` | | `admin.sankofa.nexus` | same as portal | `http` | same | ❌ No | Client access admin (same upstream until split) | | `dash.sankofa.nexus` | **`IP_SANKOFA_DASH`** (set in `ip-addresses.conf`) | `http` | **`SANKOFA_DASH_PORT`** | ❌ No | Operator dash — row omitted from fleet script until `IP_SANKOFA_DASH` set | -| `phoenix.sankofa.nexus` | `192.168.11.50` | `http` | `4000` | ❌ No | Phoenix API - Cloud Platform Portal (VMID 7800) ✅ **Deployed** | -| `www.phoenix.sankofa.nexus` | `192.168.11.50` | `http` | `4000` | ❌ No | Phoenix API (VMID 7800) ✅ **Deployed** | +| `phoenix.sankofa.nexus` | `192.168.11.50` | `http` | **`8080`** (Tier-1 **API hub** nginx; `/graphql`→**127.0.0.1:4000**, `/api`→dbis_core); **WebSocket: yes** | ❌ No | NPM fleet: `SANKOFA_NPM_PHOENIX_PORT=8080`; Apollo **not** on `0.0.0.0:4000` (loopback bind); break-glass: `pct exec 7800` → `curl http://127.0.0.1:4000/health` | +| `www.phoenix.sankofa.nexus` | `192.168.11.50` | `http` | **`8080`** | ❌ No | Same; **301** → apex HTTPS | | `the-order.sankofa.nexus`, `www.the-order.sankofa.nexus` | `192.168.11.39` (10210 HAProxy; default) or `192.168.11.51` (direct portal if env override) | `http` | `80` or `3000` | ❌ No | NPM → **.39:80** by default; HAProxy → **.51:3000** | | `studio.sankofa.nexus` | `192.168.11.72` | `http` | `8000` | ❌ No | Sankofa Studio (FusionAI Creator) — VMID 7805 | @@ -648,7 +649,7 @@ Some domains use path-based routing in NPM configs: | `sankofa.nexus`, `www.sankofa.nexus` | **Public web:** **7806**, 192.168.11.63:3000 (`IP_SANKOFA_PUBLIC_WEB`) | 192.168.11.140 (Blockscout) | | `portal.sankofa.nexus`, `admin.sankofa.nexus` | **7801**, 192.168.11.51:3000 (`IP_SANKOFA_CLIENT_SSO`) | 192.168.11.140 (Blockscout) | | `dash.sankofa.nexus` | Set **`IP_SANKOFA_DASH`** when operator dash exists | 192.168.11.140 (Blockscout) | -| `phoenix.sankofa.nexus`, `www.phoenix.sankofa.nexus` | 7800, 192.168.11.50:4000 | 192.168.11.140 (Blockscout) | +| `phoenix.sankofa.nexus`, `www.phoenix.sankofa.nexus` | **7800**, `192.168.11.50:8080` (NPM → hub); Apollo **:4000** on same CT behind hub | 192.168.11.140 (Blockscout) | | `the-order.sankofa.nexus`, `www.the-order.sankofa.nexus` | 10210, 192.168.11.39:80 | 192.168.11.140 (Blockscout) | | `studio.sankofa.nexus` | 7805, 192.168.11.72:8000 | — | diff --git a/docs/MASTER_INDEX.md b/docs/MASTER_INDEX.md index a1432c93..6c4bb90a 100644 --- a/docs/MASTER_INDEX.md +++ b/docs/MASTER_INDEX.md @@ -88,7 +88,9 @@ | **Sankofa / Phoenix public vs portal vs admin endpoints (fix list)** | [03-deployment/SANKOFA_PHOENIX_PUBLIC_PORTAL_ADMIN_ENDPOINT_CORRECTION_TASKS.md](03-deployment/SANKOFA_PHOENIX_PUBLIC_PORTAL_ADMIN_ENDPOINT_CORRECTION_TASKS.md) | — | | **Sankofa marketplace surfaces** (native vs partner offerings; IRU catalog vs portal SSO vs Studio landing) | [03-deployment/SANKOFA_MARKETPLACE_SURFACES.md](03-deployment/SANKOFA_MARKETPLACE_SURFACES.md) | — | | **Entity institutions** (Aseret, TAJ, Solace Bank Group — web/portal completion tracker) | [03-deployment/ENTITY_INSTITUTIONS_WEB_PORTAL_COMPLETION.md](03-deployment/ENTITY_INSTITUTIONS_WEB_PORTAL_COMPLETION.md) | Code: `~/projects/Aseret_Bank`, `~/projects/TAJ_PSFO/web`, `~/projects/Solace_Bank_Group/web`; static: [`solace-bank-group-portal/`](../solace-bank-group-portal/) | -| **Sankofa / Phoenix consolidated runtime** (single non-chain web hub + single API hub — resource model) | [02-architecture/SANKOFA_PHOENIX_CONSOLIDATED_FRONTEND_AND_API.md](02-architecture/SANKOFA_PHOENIX_CONSOLIDATED_FRONTEND_AND_API.md) | Examples + systemd: `config/nginx/sankofa-*.example.conf`, `config/systemd/sankofa-*-hub-nginx.service.example`, [`config/compose/sankofa-consolidated-runtime.example.yml`](../config/compose/sankofa-consolidated-runtime.example.yml); verify [`scripts/verify/check-sankofa-consolidated-nginx-examples.sh`](../scripts/verify/check-sankofa-consolidated-nginx-examples.sh); plan [`scripts/deployment/plan-sankofa-consolidated-hub-cutover.sh`](../scripts/deployment/plan-sankofa-consolidated-hub-cutover.sh) | +| **Non-chain ecosystem (hyperscaler-style cells, excl. blockchain plane)** | [02-architecture/NON_CHAIN_ECOSYSTEM_HYPERSCALER_STYLE_MODEL.md](02-architecture/NON_CHAIN_ECOSYSTEM_HYPERSCALER_STYLE_MODEL.md) | Edge, API hub, IdP, data cells; chain CTs stay separate | +| **Non-chain plan — gap analysis & backlog** | [02-architecture/NON_CHAIN_ECOSYSTEM_PLAN_REVIEW_AND_GAPS.md](02-architecture/NON_CHAIN_ECOSYSTEM_PLAN_REVIEW_AND_GAPS.md) | `TRUST_PROXY`, WebSockets, CORS, NPM vs ALB, `get_host_for_vmid`, dual-port exposure | +| **Sankofa / Phoenix consolidated runtime** (single non-chain web hub + single API hub — resource model) | [02-architecture/SANKOFA_PHOENIX_CONSOLIDATED_FRONTEND_AND_API.md](02-architecture/SANKOFA_PHOENIX_CONSOLIDATED_FRONTEND_AND_API.md); **r630-01 offload goal (phases + placement):** [03-deployment/SANKOFA_R630_01_CONSOLIDATION_AND_HUB_PLACEMENT_GOAL.md](03-deployment/SANKOFA_R630_01_CONSOLIDATION_AND_HUB_PLACEMENT_GOAL.md); **NPM hub cutover:** [03-deployment/SANKOFA_API_HUB_NPM_CUTOVER_AND_POST_CUTOVER_RUNBOOK.md](03-deployment/SANKOFA_API_HUB_NPM_CUTOVER_AND_POST_CUTOVER_RUNBOOK.md) | Examples + systemd: `config/nginx/sankofa-*.example.conf`, `config/systemd/sankofa-*-hub-nginx.service.example`, [`config/compose/sankofa-consolidated-runtime.example.yml`](../config/compose/sankofa-consolidated-runtime.example.yml); `bash scripts/verify/check-sankofa-consolidated-nginx-examples.sh`; `bash scripts/verify/verify-sankofa-consolidated-hub-lan.sh`; `bash scripts/verify/smoke-phoenix-api-hub-lan.sh`; **`pnpm run verify:phoenix-graphql-wss`** (HTTP 101 WS upgrade); **`pnpm run verify:phoenix-graphql-ws-subscription`** (`connection_ack`); [`scripts/deployment/ensure-sankofa-phoenix-graphql-ws-remove-fastify-websocket-7800.sh`](../scripts/deployment/ensure-sankofa-phoenix-graphql-ws-remove-fastify-websocket-7800.sh); [`scripts/deployment/ensure-sankofa-phoenix-7800-nft-dport-4000-guard.sh`](../scripts/deployment/ensure-sankofa-phoenix-7800-nft-dport-4000-guard.sh); [`scripts/deployment/ensure-sankofa-phoenix-api-hub-graphql-ws-proxy-headers-7800.sh`](../scripts/deployment/ensure-sankofa-phoenix-api-hub-graphql-ws-proxy-headers-7800.sh); [`scripts/deployment/ensure-sankofa-phoenix-api-env-lan-parity-7800.sh`](../scripts/deployment/ensure-sankofa-phoenix-api-env-lan-parity-7800.sh); [`scripts/deployment/ensure-sankofa-phoenix-api-db-migrate-up-7800.sh`](../scripts/deployment/ensure-sankofa-phoenix-api-db-migrate-up-7800.sh); plan [`scripts/deployment/plan-sankofa-consolidated-hub-cutover.sh`](../scripts/deployment/plan-sankofa-consolidated-hub-cutover.sh); **Apollo loopback on 7800:** [`scripts/deployment/ensure-sankofa-phoenix-apollo-bind-loopback-7800.sh`](../scripts/deployment/ensure-sankofa-phoenix-apollo-bind-loopback-7800.sh); **Firewall plan (read-only):** [`scripts/deployment/plan-phoenix-apollo-port-4000-restrict-7800.sh`](../scripts/deployment/plan-phoenix-apollo-port-4000-restrict-7800.sh); **API hub install (PVE):** [`scripts/deployment/install-sankofa-api-hub-nginx-on-pve.sh`](../scripts/deployment/install-sankofa-api-hub-nginx-on-pve.sh); **dbis `TRUST_PROXY` on CT:** [`scripts/deployment/ensure-dbis-api-trust-proxy-on-ct.sh`](../scripts/deployment/ensure-dbis-api-trust-proxy-on-ct.sh); CI: [`.github/workflows/validate-sankofa-nginx-examples.yml`](../.github/workflows/validate-sankofa-nginx-examples.yml) | | **IP conflict resolutions** | [reports/status/IP_CONFLICTS_RESOLUTION_COMPLETE.md](../reports/status/IP_CONFLICTS_RESOLUTION_COMPLETE.md), `scripts/resolve-ip-conflicts.sh` | — | | **Wormhole AI docs (LLM / MCP / RAG)** | [04-configuration/WORMHOLE_AI_RESOURCES_LLM_PLAYBOOK.md](04-configuration/WORMHOLE_AI_RESOURCES_LLM_PLAYBOOK.md), [04-configuration/WORMHOLE_AI_RESOURCES_RAG.md](04-configuration/WORMHOLE_AI_RESOURCES_RAG.md), `scripts/doc/sync-wormhole-ai-resources.sh`, `scripts/verify/verify-wormhole-ai-docs-setup.sh`, [`mcp-wormhole-docs/`](../mcp-wormhole-docs/) | Wormhole protocol reference only — not Chain 138 canonical addresses (use [11-references/EXPLORER_TOKEN_LIST_CROSSCHECK.md](11-references/EXPLORER_TOKEN_LIST_CROSSCHECK.md), CCIP runbooks for 138) | diff --git a/package.json b/package.json index 773b3159..f6b905e7 100644 --- a/package.json +++ b/package.json @@ -32,6 +32,8 @@ "test:basic": "cd mcp-proxmox && node test-basic-tools.js", "test:workflows": "cd mcp-proxmox && node test-workflows.js", "verify:ws-chain138": "node scripts/verify-ws-rpc-chain138.mjs", + "verify:phoenix-graphql-wss": "bash scripts/verify/smoke-phoenix-graphql-wss-public.sh", + "verify:phoenix-graphql-ws-subscription": "node scripts/verify/smoke-phoenix-graphql-ws-subscription.mjs", "composer:dev": "pnpm --filter transaction-composer dev", "composer:build": "pnpm --filter transaction-composer build", "composer:test": "pnpm --filter transaction-composer test", diff --git a/scripts/deployment/ensure-dbis-api-trust-proxy-on-ct.sh b/scripts/deployment/ensure-dbis-api-trust-proxy-on-ct.sh new file mode 100755 index 00000000..584d3a0a --- /dev/null +++ b/scripts/deployment/ensure-dbis-api-trust-proxy-on-ct.sh @@ -0,0 +1,83 @@ +#!/usr/bin/env bash +# Ensure TRUST_PROXY=1 exists in dbis_core API CT so req.ip / rate limits use X-Forwarded-For +# when traffic arrives via NPM or the Phoenix API hub nginx. +# +# Usage: +# bash scripts/deployment/ensure-dbis-api-trust-proxy-on-ct.sh --dry-run --vmid 10150 +# PROXMOX_OPS_APPLY=1 PROXMOX_OPS_ALLOWED_VMIDS=10150 bash scripts/deployment/ensure-dbis-api-trust-proxy-on-ct.sh --apply --vmid 10150 +# +# Mutations: appends lines to /opt/dbis-core/.env (backup first), restarts dbis-api.service. +set -euo pipefail +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" +# shellcheck source=/dev/null +source "${PROJECT_ROOT}/scripts/lib/load-project-env.sh" +# shellcheck source=/dev/null +source "${PROJECT_ROOT}/scripts/lib/proxmox-production-guard.sh" + +ENV_PATH="${DBIS_API_ENV_PATH:-/opt/dbis-core/.env}" +APPLY=false +DRY_RUN=false +VMID="${VMID_DBIS_API:-10150}" +SSH_OPTS="-o BatchMode=yes -o ConnectTimeout=15 -o StrictHostKeyChecking=accept-new" + +while [[ $# -gt 0 ]]; do + case "$1" in + --apply) APPLY=true ;; + --dry-run) DRY_RUN=true ;; + --vmid) VMID="${2:?}"; shift ;; + *) echo "Unknown arg: $1" >&2; exit 2 ;; + esac + shift +done + +PROXMOX_HOST="${PROXMOX_HOST:-$(get_host_for_vmid "$VMID")}" + +echo "=== ensure-dbis-api-trust-proxy-on-ct ===" +echo "PVE: root@${PROXMOX_HOST} VMID=${VMID} env=${ENV_PATH}" +echo "" + +if $DRY_RUN || ! $APPLY; then + echo "[DRY-RUN] Would check ${ENV_PATH} on CT ${VMID}; append TRUST_PROXY=1 if missing; restart dbis-api." + # shellcheck disable=SC2029 + ssh $SSH_OPTS "root@${PROXMOX_HOST}" "pct exec ${VMID} -- bash -lc \"if [[ ! -f '${ENV_PATH}' ]]; then echo '(missing ${ENV_PATH})'; exit 0; fi; if grep -qE '^(TRUST_PROXY|TRUST_PROXY_HOPS)=' '${ENV_PATH}' 2>/dev/null; then grep -E '^(TRUST_PROXY|TRUST_PROXY_HOPS)=' '${ENV_PATH}' | sed 's/=.*/=/'; else echo '(no TRUST_PROXY / TRUST_PROXY_HOPS lines yet)'; fi\"" + echo "For apply: --apply and PROXMOX_OPS_APPLY=1 PROXMOX_OPS_ALLOWED_VMIDS=${VMID}" + exit 0 +fi + +if ! pguard_require_apply_flag true; then + echo "Refused: set PROXMOX_OPS_APPLY=1" >&2 + exit 3 +fi +if ! pguard_vmid_allowed "$VMID"; then + exit 3 +fi + +WORKDIR="$(mktemp -d)" +trap 'rm -rf "$WORKDIR"' EXIT +REMOTE_SH="${WORKDIR}/remote.sh" +{ + printf 'ENV_PATH=%q\n' "$ENV_PATH" + cat <<'EOS' +set -euo pipefail +if [[ ! -f "$ENV_PATH" ]]; then + echo "ERROR: missing $ENV_PATH" >&2 + exit 2 +fi +if grep -qE '^[[:space:]]*TRUST_PROXY[[:space:]]*=[[:space:]]*(1|true|yes)' "$ENV_PATH"; then + echo "OK: TRUST_PROXY already enabled" + exit 0 +fi +cp -a "$ENV_PATH" "${ENV_PATH}.bak.ensure-trust-proxy-$(date +%Y%m%d%H%M%S)" +{ + echo "" + echo "# Added by ensure-dbis-api-trust-proxy-on-ct.sh — NPM / Phoenix API hub" + echo "TRUST_PROXY=1" +} >>"$ENV_PATH" +systemctl restart dbis-api.service +systemctl is-active dbis-api.service +echo "OK: appended TRUST_PROXY=1 and restarted dbis-api" +EOS +} >"$REMOTE_SH" + +ssh $SSH_OPTS "root@${PROXMOX_HOST}" "pct exec ${VMID} -- bash -s" <"$REMOTE_SH" diff --git a/scripts/deployment/ensure-sankofa-phoenix-7800-nft-dport-4000-guard.sh b/scripts/deployment/ensure-sankofa-phoenix-7800-nft-dport-4000-guard.sh new file mode 100755 index 00000000..c63116bf --- /dev/null +++ b/scripts/deployment/ensure-sankofa-phoenix-7800-nft-dport-4000-guard.sh @@ -0,0 +1,78 @@ +#!/usr/bin/env bash +# Optional: load nftables guard on Phoenix CT (7800) — reject TCP dport 4000 from non-loopback. +# See config/nftables/sankofa-phoenix-7800-guard-dport-4000.nft +# +# Usage: +# bash scripts/deployment/ensure-sankofa-phoenix-7800-nft-dport-4000-guard.sh --dry-run --vmid 7800 +# PROXMOX_OPS_APPLY=1 PROXMOX_OPS_ALLOWED_VMIDS=7800 bash scripts/deployment/ensure-sankofa-phoenix-7800-nft-dport-4000-guard.sh --apply --vmid 7800 +set -euo pipefail +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" +# shellcheck source=/dev/null +source "${PROJECT_ROOT}/scripts/lib/load-project-env.sh" +# shellcheck source=/dev/null +source "${PROJECT_ROOT}/scripts/lib/proxmox-production-guard.sh" + +NFT_SRC="${PROJECT_ROOT}/config/nftables/sankofa-phoenix-7800-guard-dport-4000.nft" +REMOTE_NFT="/tmp/sankofa-phoenix-7800-guard-dport-4000.nft" +APPLY=false +DRY_RUN=false +VMID="${SANKOFA_PHOENIX_VMID:-7800}" +SSH_OPTS="-o BatchMode=yes -o ConnectTimeout=15 -o StrictHostKeyChecking=accept-new" + +while [[ $# -gt 0 ]]; do + case "$1" in + --apply) APPLY=true ;; + --dry-run) DRY_RUN=true ;; + --vmid) VMID="${2:?}"; shift ;; + *) echo "Unknown arg: $1" >&2; exit 2 ;; + esac + shift +done + +[[ -f "$NFT_SRC" ]] || { echo "ERROR: missing $NFT_SRC" >&2; exit 2; } + +PROXMOX_HOST="${PROXMOX_HOST:-$(get_host_for_vmid "$VMID")}" + +echo "=== ensure-sankofa-phoenix-7800-nft-dport-4000-guard ===" +echo "PVE: root@${PROXMOX_HOST} VMID=${VMID}" +echo "" + +if $DRY_RUN || ! $APPLY; then + echo "[DRY-RUN] Ruleset: ${NFT_SRC}" + if command -v nft >/dev/null 2>&1; then + nft -c -f "$NFT_SRC" && echo "OK: nft -c syntax (operator host)" + else + echo "SKIP: install nftables on this workstation for local syntax check, or rely on PVE after --apply." + fi + echo "On --apply: scp ruleset to PVE → pct push into CT → ${REMOTE_NFT}; pct exec nft -f (idempotent if table exists)." + echo "For apply: --apply and PROXMOX_OPS_APPLY=1 PROXMOX_OPS_ALLOWED_VMIDS=${VMID}" + exit 0 +fi + +if ! pguard_require_apply_flag true; then + echo "Refused: set PROXMOX_OPS_APPLY=1" >&2 + exit 3 +fi +if ! pguard_vmid_allowed "$VMID"; then + exit 3 +fi + +# pct push reads the source file from the *PVE host* filesystem, not the operator workstation. +PVE_STAGING="/tmp/sankofa-phoenix-7800-guard-dport-4000-from-operator.nft" +scp $SSH_OPTS "$NFT_SRC" "root@${PROXMOX_HOST}:${PVE_STAGING}" +ssh $SSH_OPTS "root@${PROXMOX_HOST}" "pct push ${VMID} ${PVE_STAGING} ${REMOTE_NFT} && rm -f ${PVE_STAGING}" +ssh $SSH_OPTS "root@${PROXMOX_HOST}" "pct exec ${VMID} -- bash -lc ' + set -euo pipefail + if nft list table inet sankofa_phoenix_guard >/dev/null 2>&1; then + echo OK: table inet sankofa_phoenix_guard already present \(skip load\) + nft list table inet sankofa_phoenix_guard + exit 0 + fi + nft -f ${REMOTE_NFT} + echo OK: nft rules loaded + nft list table inet sankofa_phoenix_guard +'" + +echo "" +echo "Remove guard: pct exec ${VMID} -- nft delete table inet sankofa_phoenix_guard" diff --git a/scripts/deployment/ensure-sankofa-phoenix-api-db-migrate-up-7800.sh b/scripts/deployment/ensure-sankofa-phoenix-api-db-migrate-up-7800.sh new file mode 100755 index 00000000..b43e3e70 --- /dev/null +++ b/scripts/deployment/ensure-sankofa-phoenix-api-db-migrate-up-7800.sh @@ -0,0 +1,91 @@ +#!/usr/bin/env bash +# Run sankofa-api DB migrations (pnpm db:migrate:up) on CT 7800 against DB_* from /opt/sankofa-api/.env. +# Uses Python to load .env so JWT_SECRET and other values with shell metacharacters do not break `source`. +# +# Prerequisites: DB_HOST/DB_USER/DB_PASSWORD/DB_NAME correct; Postgres reachable (e.g. VMID 7803). +# +# Usage: +# bash scripts/deployment/ensure-sankofa-phoenix-api-db-migrate-up-7800.sh --dry-run --vmid 7800 +# PROXMOX_OPS_APPLY=1 PROXMOX_OPS_ALLOWED_VMIDS=7800 bash scripts/deployment/ensure-sankofa-phoenix-api-db-migrate-up-7800.sh --apply --vmid 7800 +set -euo pipefail +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" +# shellcheck source=/dev/null +source "${PROJECT_ROOT}/scripts/lib/load-project-env.sh" +# shellcheck source=/dev/null +source "${PROJECT_ROOT}/scripts/lib/proxmox-production-guard.sh" + +API_ROOT="${SANKOFA_PHOENIX_API_ROOT:-/opt/sankofa-api}" +APPLY=false +DRY_RUN=false +VMID="${SANKOFA_PHOENIX_VMID:-7800}" +SSH_OPTS="-o BatchMode=yes -o ConnectTimeout=15 -o StrictHostKeyChecking=accept-new" + +while [[ $# -gt 0 ]]; do + case "$1" in + --apply) APPLY=true ;; + --dry-run) DRY_RUN=true ;; + --vmid) VMID="${2:?}"; shift ;; + *) echo "Unknown arg: $1" >&2; exit 2 ;; + esac + shift +done + +PROXMOX_HOST="${PROXMOX_HOST:-$(get_host_for_vmid "$VMID")}" + +echo "=== ensure-sankofa-phoenix-api-db-migrate-up-7800 ===" +echo "PVE: root@${PROXMOX_HOST} VMID=${VMID} dir=${API_ROOT}" +echo "" + +if $DRY_RUN || ! $APPLY; then + # shellcheck disable=SC2029 + ssh $SSH_OPTS "root@${PROXMOX_HOST}" "pct exec ${VMID} -- bash -lc \" + [[ -d '${API_ROOT}' ]] || { echo 'missing ${API_ROOT}'; exit 0; } + test -f '${API_ROOT}/package.json' && grep -E 'db:migrate' '${API_ROOT}/package.json' | head -3 + \"" + echo "For apply: --apply and PROXMOX_OPS_APPLY=1 PROXMOX_OPS_ALLOWED_VMIDS=${VMID}" + exit 0 +fi + +if ! pguard_require_apply_flag true; then + echo "Refused: set PROXMOX_OPS_APPLY=1" >&2 + exit 3 +fi +if ! pguard_vmid_allowed "$VMID"; then + exit 3 +fi + +WORKDIR="$(mktemp -d)" +trap 'rm -rf "$WORKDIR"' EXIT +REMOTE_SH="${WORKDIR}/remote.sh" +{ + printf 'export API_ROOT=%q\n' "$API_ROOT" + cat <<'EOS' +set -euo pipefail +cd "$API_ROOT" +python3 <<'PY' +import os, subprocess +from pathlib import Path +env = dict(os.environ) +p = Path(".env") +if not p.is_file(): + raise SystemExit("ERROR: missing .env") +for line in p.read_text().splitlines(): + line = line.strip() + if not line or line.startswith("#"): + continue + if "=" not in line: + continue + k, v = line.split("=", 1) + env[k] = v +r = subprocess.run(["pnpm", "run", "db:migrate:up"], cwd=str(Path.cwd()), env=env) +raise SystemExit(r.returncode) +PY +echo "OK: db:migrate:up finished" +EOS +} >"$REMOTE_SH" + +ssh $SSH_OPTS "root@${PROXMOX_HOST}" "pct exec ${VMID} -- bash -s" <"$REMOTE_SH" + +echo "" +echo "Optional: systemctl restart sankofa-api (if schema changed compatibility)." diff --git a/scripts/deployment/ensure-sankofa-phoenix-api-env-lan-parity-7800.sh b/scripts/deployment/ensure-sankofa-phoenix-api-env-lan-parity-7800.sh new file mode 100755 index 00000000..185d19b8 --- /dev/null +++ b/scripts/deployment/ensure-sankofa-phoenix-api-env-lan-parity-7800.sh @@ -0,0 +1,169 @@ +#!/usr/bin/env bash +# LAN parity for Phoenix sankofa-api (CT 7800) /opt/sankofa-api/.env: +# - Normalize NODE_ENV: strip all NODE_ENV= lines, then append NODE_ENV= (default +# **development**) so dotenv matches relaxed secret-validation / TLS behavior. Use +# PHOENIX_API_NODE_ENV=production **only** when DB_PASSWORD and KEYCLOAK_CLIENT_SECRET are each 32+ +# chars (see src/lib/secret-validation.ts) and TLS is provisioned or tls-config terminate-at-edge patch applied. +# - Point DB_HOST at Sankofa Postgres (VMID 7803, default 192.168.11.53) instead of localhost. +# - Point KEYCLOAK_URL at Keycloak LXC (default IP_KEYCLOAK :8080). +# - Drop duplicate DB_PASSWORD lines (keeps first occurrence). +# - Append TERMINATE_TLS_AT_EDGE=1 when missing (with ensure-sankofa-phoenix-tls-config-terminate-at-edge-7800.sh). +# Optional: if operator shell has KEYCLOAK_CLIENT_SECRET set, pass MERGE_KEYCLOAK_SECRET=1 to replace +# placeholder on CT (never logs the secret). +# +# Usage: +# source scripts/lib/load-project-env.sh +# bash scripts/deployment/ensure-sankofa-phoenix-api-env-lan-parity-7800.sh --dry-run --vmid 7800 +# PROXMOX_OPS_APPLY=1 PROXMOX_OPS_ALLOWED_VMIDS=7800 bash scripts/deployment/ensure-sankofa-phoenix-api-env-lan-parity-7800.sh --apply --vmid 7800 +# MERGE_KEYCLOAK_SECRET=1 KEYCLOAK_CLIENT_SECRET=... PROXMOX_OPS_APPLY=1 ... --apply --vmid 7800 +set -euo pipefail +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" +# shellcheck source=/dev/null +source "${PROJECT_ROOT}/scripts/lib/load-project-env.sh" +# shellcheck source=/dev/null +source "${PROJECT_ROOT}/scripts/lib/proxmox-production-guard.sh" + +ENV_PATH="${SANKOFA_PHOENIX_ENV_PATH:-/opt/sankofa-api/.env}" +APPLY=false +DRY_RUN=false +VMID="${SANKOFA_PHOENIX_VMID:-7800}" +SSH_OPTS="-o BatchMode=yes -o ConnectTimeout=15 -o StrictHostKeyChecking=accept-new" + +KC_IP="${IP_KEYCLOAK:-192.168.11.52}" +PG_HOST="${PHOENIX_DB_HOST:-${DB_HOST:-192.168.11.53}}" +KEYCLOAK_URL_EFFECTIVE="${PHOENIX_KEYCLOAK_URL:-http://${KC_IP}:8080}" +MERGE_KC_SECRET="${MERGE_KEYCLOAK_SECRET:-0}" + +while [[ $# -gt 0 ]]; do + case "$1" in + --apply) APPLY=true ;; + --dry-run) DRY_RUN=true ;; + --vmid) VMID="${2:?}"; shift ;; + *) echo "Unknown arg: $1" >&2; exit 2 ;; + esac + shift +done + +PROXMOX_HOST="${PROXMOX_HOST:-$(get_host_for_vmid "$VMID")}" + +echo "=== ensure-sankofa-phoenix-api-env-lan-parity-7800 ===" +echo "PVE: root@${PROXMOX_HOST} VMID=${VMID} env=${ENV_PATH}" +echo "Planned: DB_HOST=${PG_HOST} KEYCLOAK_URL=${KEYCLOAK_URL_EFFECTIVE}" +echo "NODE_ENV: strip duplicates, then append NODE_ENV=${PHOENIX_API_NODE_ENV:-development} (override with PHOENIX_API_NODE_ENV=production when secrets meet policy)." +echo "" + +if $DRY_RUN || ! $APPLY; then + # shellcheck disable=SC2029 + ssh $SSH_OPTS "root@${PROXMOX_HOST}" "pct exec ${VMID} -- bash -lc \" + [[ -f '${ENV_PATH}' ]] || { echo 'missing ${ENV_PATH}'; exit 0; } + echo '--- current NODE_ENV / DB_HOST / KEYCLOAK_URL / DB_PASSWORD count:' + grep -nE '^(NODE_ENV|DB_HOST|KEYCLOAK_URL|DB_PASSWORD)=' '${ENV_PATH}' || true + \"" + echo "Optional: MERGE_KEYCLOAK_SECRET=1 with KEYCLOAK_CLIENT_SECRET in shell to replace placeholder on CT." + echo "If PHOENIX_API_NODE_ENV=production: run ensure-sankofa-phoenix-tls-config-terminate-at-edge-7800.sh first (no local TLS) and use 32+ char DB_PASSWORD and KEYCLOAK_CLIENT_SECRET." + echo "For apply: --apply and PROXMOX_OPS_APPLY=1 PROXMOX_OPS_ALLOWED_VMIDS=${VMID}" + exit 0 +fi + +if ! pguard_require_apply_flag true; then + echo "Refused: set PROXMOX_OPS_APPLY=1" >&2 + exit 3 +fi +if ! pguard_vmid_allowed "$VMID"; then + exit 3 +fi + +WORKDIR="$(mktemp -d)" +trap 'rm -rf "$WORKDIR"' EXIT +REMOTE_SH="${WORKDIR}/remote.sh" +{ + printf 'export ENV_PATH=%q\n' "$ENV_PATH" + printf 'export PG_HOST=%q\n' "$PG_HOST" + printf 'export KEYCLOAK_URL_EFFECTIVE=%q\n' "$KEYCLOAK_URL_EFFECTIVE" + printf 'export MERGE_KC_SECRET=%q\n' "${MERGE_KEYCLOAK_SECRET:-0}" + printf 'export KC_SECRET=%q\n' "${KEYCLOAK_CLIENT_SECRET:-}" + printf 'export PHOENIX_API_NODE_ENV=%q\n' "${PHOENIX_API_NODE_ENV:-development}" + printf 'export PHOENIX_DB_USER=%q\n' "${PHOENIX_DB_USER:-sankofa}" + printf 'export PHOENIX_DB_NAME=%q\n' "${PHOENIX_DB_NAME:-sankofa}" + cat <<'EOS' +set -euo pipefail +if [[ ! -f "$ENV_PATH" ]]; then + echo "ERROR: missing $ENV_PATH" >&2 + exit 2 +fi +cp -a "$ENV_PATH" "${ENV_PATH}.bak.lan-parity-$(date +%Y%m%d%H%M%S)" +python3 <>"$ENV_PATH" + echo "OK: appended TERMINATE_TLS_AT_EDGE=1" +fi +systemctl restart sankofa-api.service +sleep 2 +systemctl is-active sankofa-api.service +echo "OK: restarted sankofa-api" +EOS +} >"$REMOTE_SH" + +ssh $SSH_OPTS "root@${PROXMOX_HOST}" "pct exec ${VMID} -- bash -s" <"$REMOTE_SH" + +echo "" +echo "Verify: curl https://phoenix.sankofa.nexus/health && pnpm run verify:phoenix-graphql-ws-subscription" +echo "Check journal: journalctl -u sankofa-api -n 30 (audit DB errors should stop if schema+creds match Postgres)." diff --git a/scripts/deployment/ensure-sankofa-phoenix-api-hub-graphql-ws-proxy-headers-7800.sh b/scripts/deployment/ensure-sankofa-phoenix-api-hub-graphql-ws-proxy-headers-7800.sh new file mode 100755 index 00000000..325a4e7e --- /dev/null +++ b/scripts/deployment/ensure-sankofa-phoenix-api-hub-graphql-ws-proxy-headers-7800.sh @@ -0,0 +1,141 @@ +#!/usr/bin/env bash +# Idempotent: align hub nginx location /graphql-ws on CT 7800 with install-sankofa-api-hub-nginx-on-pve.sh: +# Accept-Encoding cleared, proxy_buffering off, X-Real-IP / X-Forwarded-* (for TRUST_PROXY / logging). +# +# Usage: +# bash scripts/deployment/ensure-sankofa-phoenix-api-hub-graphql-ws-proxy-headers-7800.sh --dry-run --vmid 7800 +# PROXMOX_OPS_APPLY=1 PROXMOX_OPS_ALLOWED_VMIDS=7800 bash scripts/deployment/ensure-sankofa-phoenix-api-hub-graphql-ws-proxy-headers-7800.sh --apply --vmid 7800 +set -euo pipefail +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" +# shellcheck source=/dev/null +source "${PROJECT_ROOT}/scripts/lib/load-project-env.sh" +# shellcheck source=/dev/null +source "${PROJECT_ROOT}/scripts/lib/proxmox-production-guard.sh" + +SITE_CONF="${SANKOFA_PHOENIX_HUB_SITE_CONF:-/etc/sankofa-phoenix-api-hub/conf.d/site.conf}" +APPLY=false +DRY_RUN=false +VMID="${SANKOFA_PHOENIX_VMID:-7800}" +SSH_OPTS="-o BatchMode=yes -o ConnectTimeout=15 -o StrictHostKeyChecking=accept-new" + +while [[ $# -gt 0 ]]; do + case "$1" in + --apply) APPLY=true ;; + --dry-run) DRY_RUN=true ;; + --vmid) VMID="${2:?}"; shift ;; + *) echo "Unknown arg: $1" >&2; exit 2 ;; + esac + shift +done + +PROXMOX_HOST="${PROXMOX_HOST:-$(get_host_for_vmid "$VMID")}" + +echo "=== ensure-sankofa-phoenix-api-hub-graphql-ws-proxy-headers-7800 ===" +echo "PVE: root@${PROXMOX_HOST} VMID=${VMID} conf=${SITE_CONF}" +echo "" + +if $DRY_RUN || ! $APPLY; then + echo "[DRY-RUN] Would ensure graphql-ws block has WS proxy + forwarded client headers." + # shellcheck disable=SC2029 + ssh $SSH_OPTS "root@${PROXMOX_HOST}" "pct exec ${VMID} -- bash -lc \" + if [[ ! -f '${SITE_CONF}' ]]; then echo 'missing ${SITE_CONF}'; exit 0; fi + awk '/location \\/graphql-ws/,/^ }/' '${SITE_CONF}' | head -30 + \"" + echo "For apply: --apply and PROXMOX_OPS_APPLY=1 PROXMOX_OPS_ALLOWED_VMIDS=${VMID}" + exit 0 +fi + +if ! pguard_require_apply_flag true; then + echo "Refused: set PROXMOX_OPS_APPLY=1" >&2 + exit 3 +fi +if ! pguard_vmid_allowed "$VMID"; then + exit 3 +fi + +WORKDIR="$(mktemp -d)" +trap 'rm -rf "$WORKDIR"' EXIT +REMOTE_SH="${WORKDIR}/remote.sh" +{ + printf 'export SITE_CONF=%q\n' "$SITE_CONF" + cat <<'EOS' +set -euo pipefail +if [[ ! -f "$SITE_CONF" ]]; then + echo "ERROR: missing $SITE_CONF (install hub first?)" >&2 + exit 2 +fi +rc=0 +python3 <<'PY' || rc=$? +import os +import re +import sys +from datetime import datetime +from pathlib import Path + +p = Path(os.environ["SITE_CONF"]) +t = p.read_text() +if "location /graphql-ws" not in t: + print("ERROR: no location /graphql-ws in site.conf", file=sys.stderr) + sys.exit(2) + +m = re.search(r"location /graphql-ws \{([^}]*)\}", t, flags=re.DOTALL) +if not m: + print("ERROR: could not parse graphql-ws block", file=sys.stderr) + sys.exit(2) +orig_block = m.group(1) +block = orig_block + +conn = ' proxy_set_header Connection "upgrade";\n' +extra = ( + ' proxy_set_header Connection "upgrade";\n' + ' proxy_set_header Accept-Encoding "";\n' + ' proxy_buffering off;\n' +) +if 'proxy_set_header Accept-Encoding ""' not in block or "proxy_buffering off" not in block: + if conn not in block: + print("ERROR: expected Connection upgrade line not found in graphql-ws block", file=sys.stderr) + sys.exit(2) + block = block.replace(conn, extra, 1) + +host_line = ' proxy_set_header Host $host;\n' +xfwd = ( + " proxy_set_header X-Real-IP $remote_addr;\n" + " proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;\n" + " proxy_set_header X-Forwarded-Proto $scheme;\n" +) +if host_line in block and "proxy_set_header X-Real-IP" not in block: + block = block.replace(host_line, host_line + xfwd, 1) + +if block == orig_block: + print("OK: graphql-ws block already complete") + sys.exit(0) + +bak = p.with_name(p.name + ".bak.ws-proxy-" + datetime.utcnow().strftime("%Y%m%d%H%M%S")) +bak.write_text(t) +t2 = t[: m.start(1)] + block + t[m.end(1) :] +p.write_text(t2) +print("OK: patched graphql-ws block (backup " + bak.name + ")") +sys.exit(10) +PY +if [[ "$rc" -eq 10 ]]; then + nginx -t -c /etc/sankofa-phoenix-api-hub/nginx.conf + if /usr/sbin/nginx -s reload -c /etc/sankofa-phoenix-api-hub/nginx.conf 2>/dev/null; then + echo "OK: hub nginx reloaded" + else + systemctl restart sankofa-phoenix-api-hub.service + systemctl is-active sankofa-phoenix-api-hub.service + echo "OK: hub nginx restarted" + fi +elif [[ "$rc" -eq 0 ]]; then + echo "OK: hub nginx unchanged (already had headers)" +else + exit "$rc" +fi +EOS +} >"$REMOTE_SH" + +ssh $SSH_OPTS "root@${PROXMOX_HOST}" "pct exec ${VMID} -- bash -s" <"$REMOTE_SH" + +echo "" +echo "Verify: bash scripts/verify/smoke-phoenix-graphql-wss-public.sh" diff --git a/scripts/deployment/ensure-sankofa-phoenix-api-hub-systemd-exec-reload-7800.sh b/scripts/deployment/ensure-sankofa-phoenix-api-hub-systemd-exec-reload-7800.sh new file mode 100755 index 00000000..7b7d5832 --- /dev/null +++ b/scripts/deployment/ensure-sankofa-phoenix-api-hub-systemd-exec-reload-7800.sh @@ -0,0 +1,92 @@ +#!/usr/bin/env bash +# Add ExecReload to sankofa-phoenix-api-hub.service on CT 7800 if missing (matches install template). +# +# Usage: +# bash scripts/deployment/ensure-sankofa-phoenix-api-hub-systemd-exec-reload-7800.sh --dry-run --vmid 7800 +# PROXMOX_OPS_APPLY=1 PROXMOX_OPS_ALLOWED_VMIDS=7800 bash scripts/deployment/ensure-sankofa-phoenix-api-hub-systemd-exec-reload-7800.sh --apply --vmid 7800 +set -euo pipefail +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" +# shellcheck source=/dev/null +source "${PROJECT_ROOT}/scripts/lib/load-project-env.sh" +# shellcheck source=/dev/null +source "${PROJECT_ROOT}/scripts/lib/proxmox-production-guard.sh" + +UNIT="${SANKOFA_PHOENIX_HUB_UNIT:-/etc/systemd/system/sankofa-phoenix-api-hub.service}" +APPLY=false +DRY_RUN=false +VMID="${SANKOFA_PHOENIX_VMID:-7800}" +SSH_OPTS="-o BatchMode=yes -o ConnectTimeout=15 -o StrictHostKeyChecking=accept-new" + +while [[ $# -gt 0 ]]; do + case "$1" in + --apply) APPLY=true ;; + --dry-run) DRY_RUN=true ;; + --vmid) VMID="${2:?}"; shift ;; + *) echo "Unknown arg: $1" >&2; exit 2 ;; + esac + shift +done + +PROXMOX_HOST="${PROXMOX_HOST:-$(get_host_for_vmid "$VMID")}" + +echo "=== ensure-sankofa-phoenix-api-hub-systemd-exec-reload-7800 ===" +echo "PVE: root@${PROXMOX_HOST} VMID=${VMID} unit=${UNIT}" +echo "" + +if $DRY_RUN || ! $APPLY; then + # shellcheck disable=SC2029 + ssh $SSH_OPTS "root@${PROXMOX_HOST}" "pct exec ${VMID} -- bash -lc \" + [[ -f '${UNIT}' ]] || { echo 'missing ${UNIT}'; exit 0; } + grep -n ExecReload '${UNIT}' || echo '(no ExecReload yet)' + \"" + echo "For apply: --apply and PROXMOX_OPS_APPLY=1 PROXMOX_OPS_ALLOWED_VMIDS=${VMID}" + exit 0 +fi + +if ! pguard_require_apply_flag true; then + echo "Refused: set PROXMOX_OPS_APPLY=1" >&2 + exit 3 +fi +if ! pguard_vmid_allowed "$VMID"; then + exit 3 +fi + +WORKDIR="$(mktemp -d)" +trap 'rm -rf "$WORKDIR"' EXIT +REMOTE_SH="${WORKDIR}/remote.sh" +{ + printf 'export UNIT=%q\n' "$UNIT" + cat <<'EOS' +set -euo pipefail +if [[ ! -f "$UNIT" ]]; then + echo "ERROR: missing $UNIT" >&2 + exit 2 +fi +if grep -q '^ExecReload=' "$UNIT"; then + echo "OK: ExecReload already present" + exit 0 +fi +cp -a "$UNIT" "${UNIT}.bak.execreload-$(date +%Y%m%d%H%M%S)" +python3 <<'PY' +import os +import re +from pathlib import Path + +unit = Path(os.environ["UNIT"]) +text = unit.read_text() +if re.search(r"^ExecReload=", text, flags=re.MULTILINE): + raise SystemExit(0) +m = re.search(r"^(ExecStart=.*)$", text, flags=re.MULTILINE) +if not m: + raise SystemExit("ERROR: ExecStart= not found in unit") +insert = m.group(1) + "\nExecReload=/usr/sbin/nginx -s reload -c /etc/sankofa-phoenix-api-hub/nginx.conf" +unit.write_text(text.replace(m.group(1), insert, 1)) +print("OK: inserted ExecReload after ExecStart=") +PY +systemctl daemon-reload +echo "OK: daemon-reloaded (reload hub when needed: systemctl reload sankofa-phoenix-api-hub)" +EOS +} >"$REMOTE_SH" + +ssh $SSH_OPTS "root@${PROXMOX_HOST}" "pct exec ${VMID} -- bash -s" <"$REMOTE_SH" diff --git a/scripts/deployment/ensure-sankofa-phoenix-apollo-bind-loopback-7800.sh b/scripts/deployment/ensure-sankofa-phoenix-apollo-bind-loopback-7800.sh new file mode 100755 index 00000000..094c7852 --- /dev/null +++ b/scripts/deployment/ensure-sankofa-phoenix-apollo-bind-loopback-7800.sh @@ -0,0 +1,105 @@ +#!/usr/bin/env bash +# Bind Sankofa Phoenix (Fastify) Apollo to loopback only: HOST=127.0.0.1 in /opt/sankofa-api/.env +# so :4000 is not reachable from VLAN. Requires Tier-1 API hub nginx upstream **127.0.0.1:4000** +# (default in install-sankofa-api-hub-nginx-on-pve.sh). +# +# Usage: +# bash scripts/deployment/ensure-sankofa-phoenix-apollo-bind-loopback-7800.sh --dry-run --vmid 7800 +# PROXMOX_OPS_APPLY=1 PROXMOX_OPS_ALLOWED_VMIDS=7800 bash scripts/deployment/ensure-sankofa-phoenix-apollo-bind-loopback-7800.sh --apply --vmid 7800 +# +# Mutations: edits /opt/sankofa-api/.env (backup), restarts sankofa-api.service. +set -euo pipefail +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" +# shellcheck source=/dev/null +source "${PROJECT_ROOT}/scripts/lib/load-project-env.sh" +# shellcheck source=/dev/null +source "${PROJECT_ROOT}/scripts/lib/proxmox-production-guard.sh" + +ENV_PATH="${SANKOFA_PHOENIX_ENV_PATH:-/opt/sankofa-api/.env}" +APPLY=false +DRY_RUN=false +VMID="${SANKOFA_PHOENIX_VMID:-7800}" +SSH_OPTS="-o BatchMode=yes -o ConnectTimeout=15 -o StrictHostKeyChecking=accept-new" + +while [[ $# -gt 0 ]]; do + case "$1" in + --apply) APPLY=true ;; + --dry-run) DRY_RUN=true ;; + --vmid) VMID="${2:?}"; shift ;; + *) echo "Unknown arg: $1" >&2; exit 2 ;; + esac + shift +done + +PROXMOX_HOST="${PROXMOX_HOST:-$(get_host_for_vmid "$VMID")}" + +echo "=== ensure-sankofa-phoenix-apollo-bind-loopback-7800 ===" +echo "PVE: root@${PROXMOX_HOST} VMID=${VMID} env=${ENV_PATH}" +echo "" + +if $DRY_RUN || ! $APPLY; then + echo "[DRY-RUN] Would set HOST=127.0.0.1 in ${ENV_PATH} and restart sankofa-api.service." + # shellcheck disable=SC2029 + ssh $SSH_OPTS "root@${PROXMOX_HOST}" "pct exec ${VMID} -- bash -lc \" + set -e + if [[ ! -f '${ENV_PATH}' ]]; then echo '(missing ${ENV_PATH})'; exit 0; fi + echo '--- HOST lines (redact values):' + grep -E '^HOST=' '${ENV_PATH}' 2>/dev/null | sed 's/=.*/=/' || echo '(no HOST= line)' + echo '--- :4000 listener:' + command -v ss >/dev/null && ss -tlnp | grep ':4000' || true + echo '--- hub upstream (expect 127.0.0.1:4000):' + grep -A1 'upstream sankofa_phoenix_graphql' /etc/sankofa-phoenix-api-hub/conf.d/site.conf 2>/dev/null | head -3 || echo '(no hub conf)' + \"" + echo "For apply: --apply and PROXMOX_OPS_APPLY=1 PROXMOX_OPS_ALLOWED_VMIDS=${VMID}" + exit 0 +fi + +if ! pguard_require_apply_flag true; then + echo "Refused: set PROXMOX_OPS_APPLY=1" >&2 + exit 3 +fi +if ! pguard_vmid_allowed "$VMID"; then + exit 3 +fi + +WORKDIR="$(mktemp -d)" +trap 'rm -rf "$WORKDIR"' EXIT +REMOTE_SH="${WORKDIR}/remote.sh" +{ + printf 'ENV_PATH=%q\n' "$ENV_PATH" + cat <<'EOS' +set -euo pipefail +HUB_CONF="/etc/sankofa-phoenix-api-hub/conf.d/site.conf" +if [[ -f "$HUB_CONF" ]] && ! grep -q "server 127.0.0.1:4000" "$HUB_CONF" 2>/dev/null; then + echo "ERROR: hub nginx must proxy Phoenix to 127.0.0.1:4000 (found other upstream in $HUB_CONF). Fix hub first." >&2 + exit 2 +fi +if [[ ! -f "$ENV_PATH" ]]; then + echo "ERROR: missing $ENV_PATH" >&2 + exit 2 +fi +if grep -qE '^[[:space:]]*HOST[[:space:]]*=[[:space:]]*127\.0\.0\.1' "$ENV_PATH"; then + echo "OK: HOST=127.0.0.1 already set" + exit 0 +fi +cp -a "$ENV_PATH" "${ENV_PATH}.bak.loopback-$(date +%Y%m%d%H%M%S)" +if grep -qE '^[[:space:]]*HOST[[:space:]]*=' "$ENV_PATH"; then + sed -i -E 's/^[[:space:]]*HOST[[:space:]]*=.*/HOST=127.0.0.1/' "$ENV_PATH" +else + { + echo "" + echo "# Added by ensure-sankofa-phoenix-apollo-bind-loopback-7800.sh — VLAN cannot reach :4000; use hub :8080" + echo "HOST=127.0.0.1" + } >>"$ENV_PATH" +fi +systemctl restart sankofa-api.service +systemctl is-active sankofa-api.service +echo "OK: HOST=127.0.0.1 and sankofa-api restarted" +EOS +} >"$REMOTE_SH" + +ssh $SSH_OPTS "root@${PROXMOX_HOST}" "pct exec ${VMID} -- bash -s" <"$REMOTE_SH" + +echo "" +echo "Post-check from operator (LAN): hub :8080 /health and GraphQL should still work; direct http://:4000 should refuse from other hosts." diff --git a/scripts/deployment/ensure-sankofa-phoenix-graphql-ws-remove-fastify-websocket-7800.sh b/scripts/deployment/ensure-sankofa-phoenix-graphql-ws-remove-fastify-websocket-7800.sh new file mode 100755 index 00000000..da071b01 --- /dev/null +++ b/scripts/deployment/ensure-sankofa-phoenix-graphql-ws-remove-fastify-websocket-7800.sh @@ -0,0 +1,108 @@ +#!/usr/bin/env bash +# graphql-ws uses a standalone ws WebSocketServer on fastify.server. @fastify/websocket also +# registers an 'upgrade' listener first; with no websocket routes it still races Fastify routing +# against graphql-ws and can yield broken frames (clients: "Invalid WebSocket frame: RSV1 must be clear"). +# This script removes the unused plugin import + register from server.ts on CT 7800. +# +# Complements: ensure-sankofa-phoenix-ws-disable-permessage-deflate-7800.sh (perMessageDeflate on wss). +# +# Usage: +# bash scripts/deployment/ensure-sankofa-phoenix-graphql-ws-remove-fastify-websocket-7800.sh --dry-run --vmid 7800 +# PROXMOX_OPS_APPLY=1 PROXMOX_OPS_ALLOWED_VMIDS=7800 bash scripts/deployment/ensure-sankofa-phoenix-graphql-ws-remove-fastify-websocket-7800.sh --apply --vmid 7800 +set -euo pipefail +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" +# shellcheck source=/dev/null +source "${PROJECT_ROOT}/scripts/lib/load-project-env.sh" +# shellcheck source=/dev/null +source "${PROJECT_ROOT}/scripts/lib/proxmox-production-guard.sh" + +SERVER_TS="${SANKOFA_PHOENIX_SERVER_TS:-/opt/sankofa-api/src/server.ts}" +APPLY=false +DRY_RUN=false +VMID="${SANKOFA_PHOENIX_VMID:-7800}" +SSH_OPTS="-o BatchMode=yes -o ConnectTimeout=15 -o StrictHostKeyChecking=accept-new" + +while [[ $# -gt 0 ]]; do + case "$1" in + --apply) APPLY=true ;; + --dry-run) DRY_RUN=true ;; + --vmid) VMID="${2:?}"; shift ;; + *) echo "Unknown arg: $1" >&2; exit 2 ;; + esac + shift +done + +PROXMOX_HOST="${PROXMOX_HOST:-$(get_host_for_vmid "$VMID")}" + +echo "=== ensure-sankofa-phoenix-graphql-ws-remove-fastify-websocket-7800 ===" +echo "PVE: root@${PROXMOX_HOST} VMID=${VMID} file=${SERVER_TS}" +echo "" + +if $DRY_RUN || ! $APPLY; then + echo "[DRY-RUN] Would remove @fastify/websocket import + register from ${SERVER_TS} if still present." + # shellcheck disable=SC2029 + ssh $SSH_OPTS "root@${PROXMOX_HOST}" "pct exec ${VMID} -- bash -lc \" + if [[ ! -f '${SERVER_TS}' ]]; then echo 'missing ${SERVER_TS}'; exit 0; fi + grep -n 'fastifyWebsocket\\|@fastify/websocket' '${SERVER_TS}' || echo '(no fastifyWebsocket references)' + \"" + echo "For apply: --apply and PROXMOX_OPS_APPLY=1 PROXMOX_OPS_ALLOWED_VMIDS=${VMID}" + exit 0 +fi + +if ! pguard_require_apply_flag true; then + echo "Refused: set PROXMOX_OPS_APPLY=1" >&2 + exit 3 +fi +if ! pguard_vmid_allowed "$VMID"; then + exit 3 +fi + +WORKDIR="$(mktemp -d)" +trap 'rm -rf "$WORKDIR"' EXIT +REMOTE_SH="${WORKDIR}/remote.sh" +{ + printf 'export TARGET_TS=%q\n' "$SERVER_TS" + cat <<'EOS' +set -euo pipefail +if [[ ! -f "$TARGET_TS" ]]; then + echo "ERROR: missing $TARGET_TS" >&2 + exit 2 +fi +if ! grep -q "fastifyWebsocket" "$TARGET_TS"; then + echo "OK: @fastify/websocket already removed from server.ts" + exit 0 +fi +cp -a "$TARGET_TS" "${TARGET_TS}.bak.no-fastify-ws-$(date +%Y%m%d%H%M%S)" +python3 <<'PY' +from pathlib import Path +import os +import re + +p = Path(os.environ["TARGET_TS"]) +t = p.read_text() +orig = t +# Drop import line (with optional CRLF) +t = re.sub(r"^import fastifyWebsocket from '@fastify/websocket'\r?\n", "", t, flags=re.MULTILINE) +# Drop register block (comment + register) +t = re.sub( + r"\n[ \t]*// Register WebSocket support\r?\n[ \t]*await fastify\.register\(fastifyWebsocket\)\r?\n", + "\n", + t, + count=1, +) +if t == orig: + raise SystemExit("ERROR: expected patterns not found (server.ts layout changed?)") +p.write_text(t) +print("OK: patched server.ts (removed @fastify/websocket)") +PY +systemctl restart sankofa-api.service +systemctl is-active sankofa-api.service +echo "OK: restarted sankofa-api" +EOS +} >"$REMOTE_SH" + +ssh $SSH_OPTS "root@${PROXMOX_HOST}" "pct exec ${VMID} -- bash -s" <"$REMOTE_SH" + +echo "" +echo "Next: node scripts/verify/smoke-phoenix-graphql-ws-subscription.mjs" diff --git a/scripts/deployment/ensure-sankofa-phoenix-tls-config-terminate-at-edge-7800.sh b/scripts/deployment/ensure-sankofa-phoenix-tls-config-terminate-at-edge-7800.sh new file mode 100755 index 00000000..c9428f0f --- /dev/null +++ b/scripts/deployment/ensure-sankofa-phoenix-tls-config-terminate-at-edge-7800.sh @@ -0,0 +1,117 @@ +#!/usr/bin/env bash +# Apollo (Fastify) uses tls-config.ts: in NODE_ENV=production it throws if certs are missing. +# Behind NPM + hub, TLS terminates at the edge; Apollo stays HTTP on 127.0.0.1:4000. +# This patch allows production when TERMINATE_TLS_AT_EDGE=1 (see ensure-sankofa-phoenix-api-env-lan-parity-7800.sh). +# +# Patches /opt/sankofa-api/src/lib/tls-config.ts on CT 7800 (backup), restarts sankofa-api. +# +# Usage: +# bash scripts/deployment/ensure-sankofa-phoenix-tls-config-terminate-at-edge-7800.sh --dry-run --vmid 7800 +# PROXMOX_OPS_APPLY=1 PROXMOX_OPS_ALLOWED_VMIDS=7800 bash scripts/deployment/ensure-sankofa-phoenix-tls-config-terminate-at-edge-7800.sh --apply --vmid 7800 +set -euo pipefail +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" +# shellcheck source=/dev/null +source "${PROJECT_ROOT}/scripts/lib/load-project-env.sh" +# shellcheck source=/dev/null +source "${PROJECT_ROOT}/scripts/lib/proxmox-production-guard.sh" + +TLS_TS="${SANKOFA_PHOENIX_TLS_CONFIG_TS:-/opt/sankofa-api/src/lib/tls-config.ts}" +APPLY=false +DRY_RUN=false +VMID="${SANKOFA_PHOENIX_VMID:-7800}" +SSH_OPTS="-o BatchMode=yes -o ConnectTimeout=15 -o StrictHostKeyChecking=accept-new" + +while [[ $# -gt 0 ]]; do + case "$1" in + --apply) APPLY=true ;; + --dry-run) DRY_RUN=true ;; + --vmid) VMID="${2:?}"; shift ;; + *) echo "Unknown arg: $1" >&2; exit 2 ;; + esac + shift +done + +PROXMOX_HOST="${PROXMOX_HOST:-$(get_host_for_vmid "$VMID")}" + +echo "=== ensure-sankofa-phoenix-tls-config-terminate-at-edge-7800 ===" +echo "PVE: root@${PROXMOX_HOST} VMID=${VMID} file=${TLS_TS}" +echo "" + +if $DRY_RUN || ! $APPLY; then + # shellcheck disable=SC2029 + ssh $SSH_OPTS "root@${PROXMOX_HOST}" "pct exec ${VMID} -- bash -lc \" + [[ -f '${TLS_TS}' ]] || { echo 'missing ${TLS_TS}'; exit 0; } + grep -n TERMINATE_TLS_AT_EDGE '${TLS_TS}' || echo '(not patched yet)' + \"" + echo "Requires TERMINATE_TLS_AT_EDGE=1 in /opt/sankofa-api/.env (lan-parity script adds it)." + echo "For apply: --apply and PROXMOX_OPS_APPLY=1 PROXMOX_OPS_ALLOWED_VMIDS=${VMID}" + exit 0 +fi + +if ! pguard_require_apply_flag true; then + echo "Refused: set PROXMOX_OPS_APPLY=1" >&2 + exit 3 +fi +if ! pguard_vmid_allowed "$VMID"; then + exit 3 +fi + +WORKDIR="$(mktemp -d)" +trap 'rm -rf "$WORKDIR"' EXIT +REMOTE_SH="${WORKDIR}/remote.sh" +{ + printf 'export TARGET_TS=%q\n' "$TLS_TS" + cat <<'EOS' +set -euo pipefail +if [[ ! -f "$TARGET_TS" ]]; then + echo "ERROR: missing $TARGET_TS" >&2 + exit 2 +fi +if grep -q 'TERMINATE_TLS_AT_EDGE' "$TARGET_TS"; then + echo "OK: tls-config already patched" + exit 0 +fi +cp -a "$TARGET_TS" "${TARGET_TS}.bak.terminate-at-edge-$(date +%Y%m%d%H%M%S)" +python3 <<'PY' +from pathlib import Path +import os +p = Path(os.environ["TARGET_TS"]) +t = p.read_text() +old1 = """ if (!fs.existsSync(certPath)) { + if (process.env.NODE_ENV === 'production') { + throw new Error(`TLS certificate not found: ${certPath}`) + } + logger.warn(`TLS certificate not found: ${certPath} - using HTTP only`) + }""" +new1 = """ if (!fs.existsSync(certPath)) { + if (process.env.NODE_ENV === 'production' && process.env.TERMINATE_TLS_AT_EDGE !== '1') { + throw new Error(`TLS certificate not found: ${certPath}`) + } + logger.warn(`TLS certificate not found: ${certPath} - using HTTP only`) + }""" +old2 = """ if (!fs.existsSync(keyPath)) { + if (process.env.NODE_ENV === 'production') { + throw new Error(`TLS key not found: ${keyPath}`) + } + logger.warn(`TLS key not found: ${keyPath} - using HTTP only`) + }""" +new2 = """ if (!fs.existsSync(keyPath)) { + if (process.env.NODE_ENV === 'production' && process.env.TERMINATE_TLS_AT_EDGE !== '1') { + throw new Error(`TLS key not found: ${keyPath}`) + } + logger.warn(`TLS key not found: ${keyPath} - using HTTP only`) + }""" +if old1 not in t or old2 not in t: + raise SystemExit("ERROR: tls-config.ts pattern not found (file changed?)") +p.write_text(t.replace(old1, new1, 1).replace(old2, new2, 1)) +print("OK: patched tls-config.ts") +PY +systemctl restart sankofa-api.service +sleep 2 +systemctl is-active sankofa-api.service +echo "OK: restarted sankofa-api" +EOS +} >"$REMOTE_SH" + +ssh $SSH_OPTS "root@${PROXMOX_HOST}" "pct exec ${VMID} -- bash -s" <"$REMOTE_SH" diff --git a/scripts/deployment/ensure-sankofa-phoenix-websocket-ts-import-logger-7800.sh b/scripts/deployment/ensure-sankofa-phoenix-websocket-ts-import-logger-7800.sh new file mode 100755 index 00000000..dc2072e4 --- /dev/null +++ b/scripts/deployment/ensure-sankofa-phoenix-websocket-ts-import-logger-7800.sh @@ -0,0 +1,99 @@ +#!/usr/bin/env bash +# websocket.ts uses logger in onDisconnect/onError but must import it; otherwise disconnects +# crash the whole process (ReferenceError: logger is not defined) → 502 on /graphql-ws. +# +# Patches /opt/sankofa-api/src/services/websocket.ts on CT 7800 (backup), restarts sankofa-api. +# +# Usage: +# bash scripts/deployment/ensure-sankofa-phoenix-websocket-ts-import-logger-7800.sh --dry-run --vmid 7800 +# PROXMOX_OPS_APPLY=1 PROXMOX_OPS_ALLOWED_VMIDS=7800 bash scripts/deployment/ensure-sankofa-phoenix-websocket-ts-import-logger-7800.sh --apply --vmid 7800 +set -euo pipefail +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" +# shellcheck source=/dev/null +source "${PROJECT_ROOT}/scripts/lib/load-project-env.sh" +# shellcheck source=/dev/null +source "${PROJECT_ROOT}/scripts/lib/proxmox-production-guard.sh" + +WS_TS="${SANKOFA_PHOENIX_WEBSOCKET_TS:-/opt/sankofa-api/src/services/websocket.ts}" +APPLY=false +DRY_RUN=false +VMID="${SANKOFA_PHOENIX_VMID:-7800}" +SSH_OPTS="-o BatchMode=yes -o ConnectTimeout=15 -o StrictHostKeyChecking=accept-new" + +while [[ $# -gt 0 ]]; do + case "$1" in + --apply) APPLY=true ;; + --dry-run) DRY_RUN=true ;; + --vmid) VMID="${2:?}"; shift ;; + *) echo "Unknown arg: $1" >&2; exit 2 ;; + esac + shift +done + +PROXMOX_HOST="${PROXMOX_HOST:-$(get_host_for_vmid "$VMID")}" + +echo "=== ensure-sankofa-phoenix-websocket-ts-import-logger-7800 ===" +echo "PVE: root@${PROXMOX_HOST} VMID=${VMID} file=${WS_TS}" +echo "" + +if $DRY_RUN || ! $APPLY; then + echo "[DRY-RUN] Would ensure import { logger } from '../lib/logger' in ${WS_TS}." + # shellcheck disable=SC2029 + ssh $SSH_OPTS "root@${PROXMOX_HOST}" "pct exec ${VMID} -- bash -lc \" + if [[ ! -f '${WS_TS}' ]]; then echo 'missing ${WS_TS}'; exit 0; fi + head -15 '${WS_TS}' | sed 's/^/ /' + grep -n \"from '../lib/logger'\" '${WS_TS}' || echo '(no logger import yet)' + \"" + echo "For apply: --apply and PROXMOX_OPS_APPLY=1 PROXMOX_OPS_ALLOWED_VMIDS=${VMID}" + exit 0 +fi + +if ! pguard_require_apply_flag true; then + echo "Refused: set PROXMOX_OPS_APPLY=1" >&2 + exit 3 +fi +if ! pguard_vmid_allowed "$VMID"; then + exit 3 +fi + +WORKDIR="$(mktemp -d)" +trap 'rm -rf "$WORKDIR"' EXIT +REMOTE_SH="${WORKDIR}/remote.sh" +{ + printf 'export TARGET_TS=%q\n' "$WS_TS" + cat <<'EOS' +set -euo pipefail +if [[ ! -f "$TARGET_TS" ]]; then + echo "ERROR: missing $TARGET_TS" >&2 + exit 2 +fi +if grep -q "from '../lib/logger'" "$TARGET_TS"; then + echo "OK: logger import already present" + exit 0 +fi +cp -a "$TARGET_TS" "${TARGET_TS}.bak.logger-import-$(date +%Y%m%d%H%M%S)" +python3 <<'PY' +from pathlib import Path +import os +p = Path(os.environ["TARGET_TS"]) +t = p.read_text() +needle = "import { FastifyRequest } from 'fastify'\n" +ins = needle + "\nimport { logger } from '../lib/logger'\n" +if needle not in t: + raise SystemExit("ERROR: FastifyRequest import anchor not found") +if "from '../lib/logger'" in t: + raise SystemExit(0) +p.write_text(t.replace(needle, ins, 1)) +print("OK: inserted logger import") +PY +systemctl restart sankofa-api.service +systemctl is-active sankofa-api.service +echo "OK: restarted sankofa-api" +EOS +} >"$REMOTE_SH" + +ssh $SSH_OPTS "root@${PROXMOX_HOST}" "pct exec ${VMID} -- bash -s" <"$REMOTE_SH" + +echo "" +echo "Verify: node scripts/verify/smoke-phoenix-graphql-ws-subscription.mjs" diff --git a/scripts/deployment/ensure-sankofa-phoenix-ws-disable-permessage-deflate-7800.sh b/scripts/deployment/ensure-sankofa-phoenix-ws-disable-permessage-deflate-7800.sh new file mode 100755 index 00000000..6810e869 --- /dev/null +++ b/scripts/deployment/ensure-sankofa-phoenix-ws-disable-permessage-deflate-7800.sh @@ -0,0 +1,105 @@ +#!/usr/bin/env bash +# Disable permessage-deflate on Phoenix graphql-ws (WebSocketServer in ws). If clients still +# see "RSV1 must be clear", remove the unused @fastify/websocket upgrade listener: +# ensure-sankofa-phoenix-graphql-ws-remove-fastify-websocket-7800.sh +# +# Patches /opt/sankofa-api/src/services/websocket.ts on CT 7800 (backup first), restarts sankofa-api. +# +# Usage: +# bash scripts/deployment/ensure-sankofa-phoenix-ws-disable-permessage-deflate-7800.sh --dry-run --vmid 7800 +# PROXMOX_OPS_APPLY=1 PROXMOX_OPS_ALLOWED_VMIDS=7800 bash scripts/deployment/ensure-sankofa-phoenix-ws-disable-permessage-deflate-7800.sh --apply --vmid 7800 +set -euo pipefail +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" +# shellcheck source=/dev/null +source "${PROJECT_ROOT}/scripts/lib/load-project-env.sh" +# shellcheck source=/dev/null +source "${PROJECT_ROOT}/scripts/lib/proxmox-production-guard.sh" + +WS_TS="${SANKOFA_PHOENIX_WEBSOCKET_TS:-/opt/sankofa-api/src/services/websocket.ts}" +APPLY=false +DRY_RUN=false +VMID="${SANKOFA_PHOENIX_VMID:-7800}" +SSH_OPTS="-o BatchMode=yes -o ConnectTimeout=15 -o StrictHostKeyChecking=accept-new" + +while [[ $# -gt 0 ]]; do + case "$1" in + --apply) APPLY=true ;; + --dry-run) DRY_RUN=true ;; + --vmid) VMID="${2:?}"; shift ;; + *) echo "Unknown arg: $1" >&2; exit 2 ;; + esac + shift +done + +PROXMOX_HOST="${PROXMOX_HOST:-$(get_host_for_vmid "$VMID")}" + +echo "=== ensure-sankofa-phoenix-ws-disable-permessage-deflate-7800 ===" +echo "PVE: root@${PROXMOX_HOST} VMID=${VMID} file=${WS_TS}" +echo "" + +if $DRY_RUN || ! $APPLY; then + echo "[DRY-RUN] Would inject perMessageDeflate: false into WebSocketServer options if missing." + # shellcheck disable=SC2029 + ssh $SSH_OPTS "root@${PROXMOX_HOST}" "pct exec ${VMID} -- bash -lc \" + if [[ ! -f '${WS_TS}' ]]; then echo 'missing ${WS_TS}'; exit 0; fi + if grep -q 'perMessageDeflate' '${WS_TS}'; then grep -n 'perMessageDeflate' '${WS_TS}' | head -5; else echo '(no perMessageDeflate yet)'; fi + sed -n '12,22p' '${WS_TS}' | sed 's/^/ /' + \"" + echo "For apply: --apply and PROXMOX_OPS_APPLY=1 PROXMOX_OPS_ALLOWED_VMIDS=${VMID}" + exit 0 +fi + +if ! pguard_require_apply_flag true; then + echo "Refused: set PROXMOX_OPS_APPLY=1" >&2 + exit 3 +fi +if ! pguard_vmid_allowed "$VMID"; then + exit 3 +fi + +WORKDIR="$(mktemp -d)" +trap 'rm -rf "$WORKDIR"' EXIT +REMOTE_SH="${WORKDIR}/remote.sh" +{ + printf 'export TARGET_TS=%q\n' "$WS_TS" + cat <<'EOS' +set -euo pipefail +if [[ ! -f "$TARGET_TS" ]]; then + echo "ERROR: missing $TARGET_TS" >&2 + exit 2 +fi +if grep -q 'perMessageDeflate' "$TARGET_TS"; then + echo "OK: perMessageDeflate already present" + exit 0 +fi +cp -a "$TARGET_TS" "${TARGET_TS}.bak.ws-deflate-$(date +%Y%m%d%H%M%S)" +python3 <<'PY' +from pathlib import Path +import os +p = Path(os.environ["TARGET_TS"]) +t = p.read_text() +old = """ const wss = new WebSocketServer({ + server: httpServer, + path, + })""" +new = """ const wss = new WebSocketServer({ + server: httpServer, + path, + perMessageDeflate: false, + })""" +if old not in t: + raise SystemExit("ERROR: WebSocketServer block not found or already modified") +p.write_text(t.replace(old, new, 1)) +print("OK: patched WebSocketServer") +PY +systemctl restart sankofa-api.service +systemctl is-active sankofa-api.service +echo "OK: restarted sankofa-api" +EOS +} >"$REMOTE_SH" + +ssh $SSH_OPTS "root@${PROXMOX_HOST}" "pct exec ${VMID} -- bash -s" <"$REMOTE_SH" + +echo "" +echo "Next: pnpm exec node scripts/verify/smoke-phoenix-graphql-ws-subscription.mjs" diff --git a/scripts/deployment/install-sankofa-api-hub-nginx-on-pve.sh b/scripts/deployment/install-sankofa-api-hub-nginx-on-pve.sh new file mode 100755 index 00000000..4236d369 --- /dev/null +++ b/scripts/deployment/install-sankofa-api-hub-nginx-on-pve.sh @@ -0,0 +1,195 @@ +#!/usr/bin/env bash +# Install Tier-1 Phoenix API hub (nginx :8080) on an existing LXC. +# /graphql* → SANKOFA_API_HUB_UPSTREAM_PHOENIX (default 127.0.0.1:4000) +# /api*, /api-docs → SANKOFA_API_HUB_UPSTREAM_DBIS (default IP_DBIS_API:3000) +# +# Usage: +# ./scripts/deployment/install-sankofa-api-hub-nginx-on-pve.sh --dry-run --vmid 7800 +# PROXMOX_OPS_APPLY=1 PROXMOX_OPS_ALLOWED_VMIDS=7800 ./scripts/deployment/install-sankofa-api-hub-nginx-on-pve.sh --apply --vmid 7800 +# +# Requires: SSH root@PROXMOX_HOST; pct; Debian/Ubuntu CT (apt-get). Does not change NPM. +# Upstream Phoenix should be 127.0.0.1:4000 when Apollo binds loopback (see ensure-sankofa-phoenix-apollo-bind-loopback-7800.sh). +# PROXMOX_HOST must be the PVE node that hosts this CT (where `pct exec` works), not the CT IP. +# +set -euo pipefail +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" +# shellcheck source=/dev/null +source "${PROJECT_ROOT}/scripts/lib/load-project-env.sh" +# shellcheck source=/dev/null +source "${PROJECT_ROOT}/scripts/lib/proxmox-production-guard.sh" + +PROXMOX_HOST="${PROXMOX_HOST:-${PROXMOX_HOST_R630_01:-192.168.11.11}}" +SSH_OPTS="-o BatchMode=yes -o ConnectTimeout=15 -o StrictHostKeyChecking=accept-new" +APPLY=false +VMID="" +DRY_RUN=false + +while [[ $# -gt 0 ]]; do + case "$1" in + --apply) APPLY=true ;; + --dry-run) DRY_RUN=true ;; + --vmid) VMID="${2:?}"; shift ;; + *) echo "Unknown arg: $1" >&2; exit 2 ;; + esac + shift +done + +[[ -n "$VMID" ]] || { echo "ERROR: --vmid required (e.g. 7800)." >&2; exit 2; } + +PH_UP="${SANKOFA_API_HUB_UPSTREAM_PHOENIX:-127.0.0.1:4000}" +DB_UP="${SANKOFA_API_HUB_UPSTREAM_DBIS:-${IP_DBIS_API:-192.168.11.155}:3000}" + +echo "=== install-sankofa-api-hub-nginx-on-pve ===" +echo "PVE: root@${PROXMOX_HOST} VMID=${VMID}" +echo "Upstream Phoenix: ${PH_UP} dbis_core: ${DB_UP}" +if command -v get_host_for_vmid >/dev/null 2>&1; then + echo "get_host_for_vmid ${VMID}: $(get_host_for_vmid "${VMID}")" +fi +echo "" + +if $DRY_RUN || ! $APPLY; then + echo "[DRY-RUN] Would: ssh → pct push → pct exec (apt nginx, conf under /etc/sankofa-phoenix-api-hub/, systemd sankofa-phoenix-api-hub)." + $APPLY || true + echo "For live install: --apply + PROXMOX_OPS_APPLY=1 + PROXMOX_OPS_ALLOWED_VMIDS=${VMID}" + exit 0 +fi + +if ! pguard_require_apply_flag true; then + echo "Refused: set PROXMOX_OPS_APPLY=1" >&2 + exit 3 +fi +if ! pguard_vmid_allowed "$VMID"; then + exit 3 +fi + +WORKDIR="${TMPDIR:-/tmp}/sankofa-hub-pkg-$$" +mkdir -p "$WORKDIR" +cleanup() { rm -rf "$WORKDIR"; } +trap cleanup EXIT + +GEN="${WORKDIR}/site.conf" +MAIN="${WORKDIR}/nginx.conf" +UNIT="${WORKDIR}/sankofa-phoenix-api-hub.service" + +cat >"$GEN" <"$MAIN" <<'MAINEOF' +user www-data; +worker_processes auto; +error_log /var/log/nginx/sankofa-api-hub-error.log warn; +pid /tmp/sankofa-api-hub-nginx.pid; +events { worker_connections 1024; } +http { + include /etc/nginx/mime.types; + default_type application/octet-stream; + sendfile on; + keepalive_timeout 65; + include /etc/sankofa-phoenix-api-hub/conf.d/*.conf; +} +MAINEOF + +cat >"$UNIT" <<'UNITEOF' +[Unit] +Description=Sankofa Phoenix API hub (nginx :8080) +After=network.target + +[Service] +Type=simple +ExecStartPre=/usr/sbin/nginx -t -c /etc/sankofa-phoenix-api-hub/nginx.conf +ExecStart=/usr/sbin/nginx -g "daemon off;" -c /etc/sankofa-phoenix-api-hub/nginx.conf +ExecReload=/usr/sbin/nginx -s reload -c /etc/sankofa-phoenix-api-hub/nginx.conf +Restart=on-failure +RestartSec=5 + +[Install] +WantedBy=multi-user.target +UNITEOF + +REMOTE="/tmp/sankofa-hub-ssh-$$" +ssh $SSH_OPTS "root@${PROXMOX_HOST}" "mkdir -p ${REMOTE}" +scp $SSH_OPTS "$GEN" "$MAIN" "$UNIT" "root@${PROXMOX_HOST}:${REMOTE}/" + +ssh $SSH_OPTS "root@${PROXMOX_HOST}" bash -s -- "$VMID" "$REMOTE" <<'REMOTE' +set -euo pipefail +VMID="$1" +REMOTE="$2" +pct push "${VMID}" "${REMOTE}/site.conf" /tmp/sankofa-hub-site.conf +pct push "${VMID}" "${REMOTE}/nginx.conf" /tmp/sankofa-hub-nginx-main.conf +pct push "${VMID}" "${REMOTE}/sankofa-phoenix-api-hub.service" /tmp/sankofa-phoenix-api-hub.service +rm -rf "${REMOTE}" +pct exec "${VMID}" -- bash -lc ' +set -euo pipefail +export DEBIAN_FRONTEND=noninteractive +apt-get update -qq +apt-get install -y -qq nginx +rm -f /etc/nginx/sites-enabled/default +mkdir -p /etc/sankofa-phoenix-api-hub/conf.d +install -m 0644 /tmp/sankofa-hub-site.conf /etc/sankofa-phoenix-api-hub/conf.d/site.conf +install -m 0644 /tmp/sankofa-hub-nginx-main.conf /etc/sankofa-phoenix-api-hub/nginx.conf +install -m 0644 /tmp/sankofa-phoenix-api-hub.service /etc/systemd/system/sankofa-phoenix-api-hub.service +rm -f /tmp/sankofa-hub-site.conf /tmp/sankofa-hub-nginx-main.conf /tmp/sankofa-phoenix-api-hub.service +nginx -t -c /etc/sankofa-phoenix-api-hub/nginx.conf +systemctl stop nginx 2>/dev/null || true +systemctl disable nginx 2>/dev/null || true +systemctl daemon-reload +systemctl enable sankofa-phoenix-api-hub +systemctl restart sankofa-phoenix-api-hub +systemctl is-active sankofa-phoenix-api-hub +' +REMOTE + +echo "" +echo "Smoke (Phoenix CT LAN IP, port 8080):" +echo " curl -sS http://${IP_SANKOFA_PHOENIX_API}:8080/health" +echo "Next: NPM maintenance — point phoenix.sankofa.nexus upstream to :8080 if desired." diff --git a/scripts/deployment/plan-phoenix-apollo-port-4000-restrict-7800.sh b/scripts/deployment/plan-phoenix-apollo-port-4000-restrict-7800.sh new file mode 100755 index 00000000..192ab9df --- /dev/null +++ b/scripts/deployment/plan-phoenix-apollo-port-4000-restrict-7800.sh @@ -0,0 +1,44 @@ +#!/usr/bin/env bash +# Read-only: show how Apollo :4000 is bound on Phoenix LXC (7800) and safe options to restrict it +# after NPM uses the Tier-1 hub on :8080 only. Does not change iptables or Phoenix config. +# +# Usage: +# bash scripts/deployment/plan-phoenix-apollo-port-4000-restrict-7800.sh [--ssh] +# --ssh run ss on CT via PVE (needs SSH root@PROXMOX_HOST + pct) +set -euo pipefail +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" +# shellcheck source=/dev/null +source "${PROJECT_ROOT}/scripts/lib/load-project-env.sh" + +VMID="${PHOENIX_API_VMID:-7800}" +PROXMOX_HOST="${PROXMOX_HOST:-$(get_host_for_vmid "$VMID")}" +SSH=false +[[ "${1:-}" == "--ssh" ]] && SSH=true + +echo "=== plan-phoenix-apollo-port-4000-restrict-7800 (read-only) ===" +echo "VMID=${VMID} PVE=${PROXMOX_HOST}" +echo "" +echo "Goal: after NPM → :8080 hub, avoid LAN-wide exposure of raw Apollo :4000 (bypass hub CORS/WAF intent)." +echo "" +echo "1) **Bind Apollo to loopback** (preferred — Fastify uses HOST from env):" +echo " Run: PROXMOX_OPS_APPLY=1 PROXMOX_OPS_ALLOWED_VMIDS=7800 \\" +echo " bash scripts/deployment/ensure-sankofa-phoenix-apollo-bind-loopback-7800.sh --apply --vmid 7800" +echo " Hub upstream must stay server 127.0.0.1:4000; restart order: .env then sankofa-api.service." +echo "" +echo "2) **Host firewall on CT ${VMID}** (nftables/iptables): allow TCP :4000 only from 127.0.0.1 (and" +echo " same-container processes). Drop from 192.168.11.0/24. Add an explicit operator allowlist" +echo " for break-glass IPs if needed. Dry-run with nft -c; apply in a maintenance window." +echo "" +echo "3) Document the chosen option in docs/04-configuration/ALL_VMIDS_ENDPOINTS.md." +echo "" + +if $SSH; then + echo "--- Live listeners (pct exec ${VMID}):" + # shellcheck disable=SC2029 + ssh -o BatchMode=yes -o ConnectTimeout=12 "root@${PROXMOX_HOST}" \ + "pct exec ${VMID} -- bash -lc \"command -v ss >/dev/null && ss -tlnp | grep -E ':4000|:8080' || ( netstat -tlnp 2>/dev/null | grep -E ':4000|:8080' ) || echo '(ss/netstat unavailable)'\"" \ + || echo "SSH/pct failed (skip if not on LAN)." +else + echo "Re-run with --ssh on LAN to print :4000 / :8080 listeners from the CT." +fi diff --git a/scripts/deployment/plan-sankofa-consolidated-hub-cutover.sh b/scripts/deployment/plan-sankofa-consolidated-hub-cutover.sh new file mode 100755 index 00000000..9ab5a786 --- /dev/null +++ b/scripts/deployment/plan-sankofa-consolidated-hub-cutover.sh @@ -0,0 +1,38 @@ +#!/usr/bin/env bash +# Read-only plan: consolidated web hub + API hub cutover reminders. +# Does not SSH, mutate NPM, or change Proxmox. Load dotenv when available. +set -euo pipefail +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" +# shellcheck disable=SC1090 +[[ -f "${PROJECT_ROOT}/scripts/lib/load-project-env.sh" ]] && source "${PROJECT_ROOT}/scripts/lib/load-project-env.sh" || true + +echo "=== Sankofa consolidated hub — operator plan (dry text only) ===" +echo "" +echo "1) Validate example nginx syntax on operator workstation or CI:" +echo " bash scripts/verify/check-sankofa-consolidated-nginx-examples.sh" +echo "" +echo "2) After provisioning hub LXCs, set in repo .env (overrides ip-addresses defaults):" +echo " IP_SANKOFA_WEB_HUB=" +echo " SANKOFA_WEB_HUB_PORT=80" +echo " IP_SANKOFA_PHOENIX_API_HUB=" +echo " SANKOFA_PHOENIX_API_HUB_PORT=8080 # example when nginx listens for NPM" +echo "" +echo "3) Install configs on hub CT (paths match systemd examples):" +echo " /etc/sankofa-web-hub/nginx.conf ← config/nginx/sankofa-hub-main.example.conf" +echo " /etc/sankofa-web-hub/conf.d/site.conf ← sankofa-non-chain-frontends.example.conf (tuned)" +echo " /etc/sankofa-phoenix-api-hub/nginx.conf + conf.d/ ← sankofa-api-hub-main + phoenix-api-hub" +echo "" +echo "4) Point upstream blocks in API hub nginx to real Phoenix (:${SANKOFA_PHOENIX_API_PORT:-4000}) and dbis_core (:3000 or your LAN)." +echo "" +echo "5) NPM: point affected FQDNs to IP_SANKOFA_WEB_HUB; for phoenix.sankofa.nexus set SANKOFA_NPM_PHOENIX_PORT=8080 (and IP_SANKOFA_NPM_PHOENIX_API if hub IP differs) then run scripts/nginx-proxy-manager/update-npmplus-proxy-hosts-api.sh" +echo " Full sequence: docs/03-deployment/SANKOFA_API_HUB_NPM_CUTOVER_AND_POST_CUTOVER_RUNBOOK.md" +echo "" +echo "6) Update docs/04-configuration/ALL_VMIDS_ENDPOINTS.md and get_host_for_vmid when VMIDs are retired." +echo "" +echo "Current resolved defaults (from config):" +echo " IP_SANKOFA_WEB_HUB=${IP_SANKOFA_WEB_HUB:-unset}" +echo " IP_SANKOFA_PHOENIX_API_HUB=${IP_SANKOFA_PHOENIX_API_HUB:-unset}" +echo "" +echo "Architecture: docs/02-architecture/SANKOFA_PHOENIX_CONSOLIDATED_FRONTEND_AND_API.md" +echo "r630-01 goal (phases + placement): docs/03-deployment/SANKOFA_R630_01_CONSOLIDATION_AND_HUB_PLACEMENT_GOAL.md" diff --git a/scripts/env.r630-01.example b/scripts/env.r630-01.example index 01ffa7ea..425e7912 100644 --- a/scripts/env.r630-01.example +++ b/scripts/env.r630-01.example @@ -47,6 +47,8 @@ KEYCLOAK_MULTI_REALM=false # API Configuration API_HOST=192.168.11.50 API_PORT=4000 +# When Tier-1 hub terminates public GraphQL, bind Fastify to loopback on the Phoenix CT (7800): +# HOST=127.0.0.1 NEXT_PUBLIC_GRAPHQL_ENDPOINT=http://192.168.11.50:4000/graphql NEXT_PUBLIC_GRAPHQL_WS_ENDPOINT=ws://192.168.11.50:4000/graphql-ws JWT_SECRET=CHANGE_THIS_JWT_SECRET_IN_PRODUCTION diff --git a/scripts/lib/load-project-env.sh b/scripts/lib/load-project-env.sh index fa21893b..e144d797 100644 --- a/scripts/lib/load-project-env.sh +++ b/scripts/lib/load-project-env.sh @@ -5,7 +5,7 @@ # Usage: source "${SCRIPT_DIR}/lib/load-project-env.sh" # # Env precedence (first wins): 1) .env 2) config/ip-addresses.conf 3) smom-dbis-138/.env 4) dbis_core config -# Version: 2026-04-12 (get_host_for_vmid aligned with 2026-04-09 live Chain 138 placement) +# Version: 2026-04-13 (get_host_for_vmid: explicit Sankofa 7800–7806 on r630-01) [[ -n "${PROJECT_ROOT:-}" ]] || PROJECT_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" export PROJECT_ROOT @@ -128,12 +128,13 @@ export SMOM_DIR="${SMOM_DBIS_138_DIR:-${PROJECT_ROOT}/smom-dbis-138}" export DBIS_CORE_DIR="${DBIS_CORE_DIR:-${PROJECT_ROOT}/dbis_core}" # VMID -> Proxmox host (for pct/qm operations) -# Covers: DBIS (101xx), RPC (2101-2103, 2201, 2301, etc.), Blockscout (5000), CCIP (5400-5476), NPMplus (10233, 10234) +# Covers: DBIS (101xx), RPC (2101-2103, 2201, 2301, etc.), Blockscout (5000), CCIP (5400-5476), NPMplus (10233, 10234), Sankofa stack (7800–7806) # Live placement (2026-04-09): validators 1003/1004, sentries 1503-1510, and RPCs 2102, 2301, 2304, 2400, 2402, 2403 on r630-03; # RPCs 2201, 2303, 2305-2308, 2401 on r630-02; 2101 + 2103 remain on r630-01 — see ALL_VMIDS_ENDPOINTS.md get_host_for_vmid() { local vmid="$1" case "$vmid" in + 7800|7801|7802|7803|7804|7805|7806) echo "${PROXMOX_HOST_R630_01}";; 10130|10150|10151|106|107|108|10000|10001|10020|10100|10101|10120|10203|10233|10235) echo "${PROXMOX_HOST_R630_01}";; 1000|1001|1002|1500|1501|1502|2101|2103) echo "${PROXMOX_HOST_R630_01}";; 1003|1004|1503|1504|1505|1506|1507|1508|1509|1510|2102|2301|2304|2400|2402|2403) echo "${PROXMOX_HOST_R630_03}";; diff --git a/scripts/nginx-proxy-manager/update-npmplus-proxy-hosts-api.sh b/scripts/nginx-proxy-manager/update-npmplus-proxy-hosts-api.sh index e0fcce73..188f1dcc 100755 --- a/scripts/nginx-proxy-manager/update-npmplus-proxy-hosts-api.sh +++ b/scripts/nginx-proxy-manager/update-npmplus-proxy-hosts-api.sh @@ -468,6 +468,10 @@ IP_SANKOFA_PORTAL="${IP_SANKOFA_PORTAL:-${IP_SERVICE_51:-192.168.11.51}}" IP_SANKOFA_PHOENIX_API="${IP_SANKOFA_PHOENIX_API:-${IP_SERVICE_50:-192.168.11.50}}" SANKOFA_PORTAL_PORT="${SANKOFA_PORTAL_PORT:-3000}" SANKOFA_PHOENIX_API_PORT="${SANKOFA_PHOENIX_API_PORT:-4000}" +# NPM upstream for phoenix.sankofa.nexus (default same IP as Phoenix CT, port SANKOFA_PHOENIX_API_PORT, usually :4000). +# Set SANKOFA_NPM_PHOENIX_PORT=8080 when NPM should hit Tier-1 API hub nginx. Optional IP_SANKOFA_NPM_PHOENIX_API if hub is on another CT. +IP_SANKOFA_NPM_PHOENIX_API="${IP_SANKOFA_NPM_PHOENIX_API:-${IP_SANKOFA_PHOENIX_API}}" +SANKOFA_NPM_PHOENIX_PORT="${SANKOFA_NPM_PHOENIX_PORT:-${SANKOFA_PHOENIX_API_PORT}}" # Resolved before portal/SSO rows so dash can default to client SSO stack IP_SANKOFA_CLIENT_SSO="${IP_SANKOFA_CLIENT_SSO:-${IP_SANKOFA_PORTAL}}" SANKOFA_CLIENT_SSO_PORT="${SANKOFA_CLIENT_SSO_PORT:-${SANKOFA_PORTAL_PORT}}" @@ -475,8 +479,9 @@ IP_SANKOFA_PUBLIC_WEB="${IP_SANKOFA_PUBLIC_WEB:-${IP_SANKOFA_PORTAL}}" SANKOFA_PUBLIC_WEB_PORT="${SANKOFA_PUBLIC_WEB_PORT:-${SANKOFA_PORTAL_PORT}}" update_proxy_host "sankofa.nexus" "http://${IP_SANKOFA_PUBLIC_WEB}:${SANKOFA_PUBLIC_WEB_PORT}" false false && updated_count=$((updated_count + 1)) || { add_proxy_host "sankofa.nexus" "${IP_SANKOFA_PUBLIC_WEB}" "${SANKOFA_PUBLIC_WEB_PORT}" false false && updated_count=$((updated_count + 1)); } || failed_count=$((failed_count + 1)) update_proxy_host "www.sankofa.nexus" "http://${IP_SANKOFA_PUBLIC_WEB}:${SANKOFA_PUBLIC_WEB_PORT}" false false "https://sankofa.nexus" && updated_count=$((updated_count + 1)) || { add_proxy_host "www.sankofa.nexus" "${IP_SANKOFA_PUBLIC_WEB}" "${SANKOFA_PUBLIC_WEB_PORT}" false false "https://sankofa.nexus" && updated_count=$((updated_count + 1)); } || failed_count=$((failed_count + 1)) -update_proxy_host "phoenix.sankofa.nexus" "http://${IP_SANKOFA_PHOENIX_API}:${SANKOFA_PHOENIX_API_PORT}" false false && updated_count=$((updated_count + 1)) || { add_proxy_host "phoenix.sankofa.nexus" "${IP_SANKOFA_PHOENIX_API}" "${SANKOFA_PHOENIX_API_PORT}" false false && updated_count=$((updated_count + 1)); } || failed_count=$((failed_count + 1)) -update_proxy_host "www.phoenix.sankofa.nexus" "http://${IP_SANKOFA_PHOENIX_API}:${SANKOFA_PHOENIX_API_PORT}" false false "https://phoenix.sankofa.nexus" && updated_count=$((updated_count + 1)) || { add_proxy_host "www.phoenix.sankofa.nexus" "${IP_SANKOFA_PHOENIX_API}" "${SANKOFA_PHOENIX_API_PORT}" false false "https://phoenix.sankofa.nexus" && updated_count=$((updated_count + 1)); } || failed_count=$((failed_count + 1)) +# WebSocket true — GraphQL subscriptions (/graphql-ws) via hub or direct Apollo +update_proxy_host "phoenix.sankofa.nexus" "http://${IP_SANKOFA_NPM_PHOENIX_API}:${SANKOFA_NPM_PHOENIX_PORT}" true false && updated_count=$((updated_count + 1)) || { add_proxy_host "phoenix.sankofa.nexus" "${IP_SANKOFA_NPM_PHOENIX_API}" "${SANKOFA_NPM_PHOENIX_PORT}" true false && updated_count=$((updated_count + 1)); } || failed_count=$((failed_count + 1)) +update_proxy_host "www.phoenix.sankofa.nexus" "http://${IP_SANKOFA_NPM_PHOENIX_API}:${SANKOFA_NPM_PHOENIX_PORT}" true false "https://phoenix.sankofa.nexus" && updated_count=$((updated_count + 1)) || { add_proxy_host "www.phoenix.sankofa.nexus" "${IP_SANKOFA_NPM_PHOENIX_API}" "${SANKOFA_NPM_PHOENIX_PORT}" true false "https://phoenix.sankofa.nexus" && updated_count=$((updated_count + 1)); } || failed_count=$((failed_count + 1)) # Keycloak (CT 7802) — portal SSO; NPM must forward X-Forwarded-* (Keycloak KC_PROXY_HEADERS=xforwarded on upstream) IP_KEYCLOAK="${IP_KEYCLOAK:-192.168.11.52}" update_proxy_host "keycloak.sankofa.nexus" "http://${IP_KEYCLOAK}:8080" false false && updated_count=$((updated_count + 1)) || { add_proxy_host "keycloak.sankofa.nexus" "${IP_KEYCLOAK}" 8080 false false && updated_count=$((updated_count + 1)); } || failed_count=$((failed_count + 1)) diff --git a/scripts/verify/check-sankofa-consolidated-nginx-examples.sh b/scripts/verify/check-sankofa-consolidated-nginx-examples.sh new file mode 100755 index 00000000..b3aa1139 --- /dev/null +++ b/scripts/verify/check-sankofa-consolidated-nginx-examples.sh @@ -0,0 +1,54 @@ +#!/usr/bin/env bash +# Validate example nginx configs for Sankofa consolidated web/API hub (syntax only). +# Read-only; no mutations. Uses host `nginx -t` when available, else Docker `nginx:1.27-alpine`. +set -euo pipefail +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" +NGINX_DIR="${PROJECT_ROOT}/config/nginx" +TMPDIR="${TMPDIR:-/tmp}" +WRAP="${TMPDIR}/sankofa-nginx-test-$$.conf" + +cleanup() { + rm -f "$WRAP" "${TMPDIR}/sankofa-nginx-wrap-docker-"*.conf 2>/dev/null || true + rm -f "${TMPDIR}/sankofa-nginx-test-$$"/*.conf 2>/dev/null || true + rmdir "${TMPDIR}/sankofa-nginx-test-$$" 2>/dev/null || true +} +trap cleanup EXIT + +mkdir -p "${TMPDIR}/sankofa-nginx-test-$$" +cp "${NGINX_DIR}/sankofa-non-chain-frontends.example.conf" "${TMPDIR}/sankofa-nginx-test-$$/01-web.conf" +cp "${NGINX_DIR}/sankofa-phoenix-api-hub.example.conf" "${TMPDIR}/sankofa-nginx-test-$$/02-api.conf" + +cat >"$WRAP" <<'EOF' +events { worker_connections 1024; } +http { + include __INCLUDE_DIR__/*.conf; +} +EOF +sed -i "s|__INCLUDE_DIR__|${TMPDIR}/sankofa-nginx-test-$$|g" "$WRAP" + +if command -v nginx >/dev/null 2>&1; then + echo "== nginx -t (host binary, wrapped includes) ==" + nginx -t -c "$WRAP" +elif command -v docker >/dev/null 2>&1; then + DOCKER_WRAP="${TMPDIR}/sankofa-nginx-wrap-docker-$$.conf" + cat >"$DOCKER_WRAP" <<'INNER' +events { worker_connections 1024; } +http { + include /etc/nginx/mime.types; + default_type application/octet-stream; + include /tmp/inc/*.conf; +} +INNER + echo "== nginx -t (docker nginx:1.27-alpine) ==" + docker run --rm \ + -v "$DOCKER_WRAP:/etc/nginx/nginx.conf:ro" \ + -v "${TMPDIR}/sankofa-nginx-test-$$:/tmp/inc:ro" \ + nginx:1.27-alpine \ + nginx -t +else + echo "SKIP: need host nginx or docker to run syntax check." + exit 0 +fi + +echo "OK: example configs parse." diff --git a/scripts/verify/smoke-phoenix-api-hub-lan.sh b/scripts/verify/smoke-phoenix-api-hub-lan.sh new file mode 100755 index 00000000..220122bd --- /dev/null +++ b/scripts/verify/smoke-phoenix-api-hub-lan.sh @@ -0,0 +1,33 @@ +#!/usr/bin/env bash +# Read-only LAN smoke: Tier-1 Phoenix API hub (:8080) — health + GraphQL + proxied api-docs. +# Usage: bash scripts/verify/smoke-phoenix-api-hub-lan.sh +# Env: IP_SANKOFA_PHOENIX_API, SANKOFA_API_HUB_PORT (default 8080) from load-project-env / ip-addresses. +set -euo pipefail +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" +# shellcheck source=/dev/null +source "${PROJECT_ROOT}/scripts/lib/load-project-env.sh" + +HUB_IP="${IP_SANKOFA_PHOENIX_API:-192.168.11.50}" +# Tier-1 nginx hub listens on :8080 by default (not SANKOFA_PHOENIX_API_HUB_PORT, which often tracks Apollo :4000). +HUB_PORT="${SANKOFA_API_HUB_LISTEN_PORT:-8080}" +BASE="http://${HUB_IP}:${HUB_PORT}" + +echo "=== smoke-phoenix-api-hub-lan ===" +echo "Base: ${BASE}" +echo "" + +curl -fsS -m 8 "${BASE}/health" | head -c 200 +echo "" +echo "--- GraphQL POST /graphql" +curl -fsS -m 12 "${BASE}/graphql" \ + -H 'Content-Type: application/json' \ + -d '{"query":"query { __typename }"}' | head -c 300 +echo "" +echo "--- GET /api-docs (optional)" +_ad="/tmp/hub-api-docs-$$" +code="$(curl -sS -m 12 -o "$_ad" -w "%{http_code}" "${BASE}/api-docs" || echo 000)" +if [[ "$code" == "200" ]]; then head -c 120 "$_ad"; echo ""; else echo "HTTP ${code} (hub still OK if GraphQL passed)"; fi +rm -f "$_ad" +echo "" +echo "OK: hub smoke passed." diff --git a/scripts/verify/smoke-phoenix-graphql-ws-subscription.mjs b/scripts/verify/smoke-phoenix-graphql-ws-subscription.mjs new file mode 100644 index 00000000..ddaa9efc --- /dev/null +++ b/scripts/verify/smoke-phoenix-graphql-ws-subscription.mjs @@ -0,0 +1,68 @@ +#!/usr/bin/env node +/** + * Optional: full graphql-ws handshake — connection_init → connection_ack over wss:// + * Server must expose a single clean upgrade path (standalone `ws` + graphql-ws; remove unused + * `@fastify/websocket` on CT 7800 if clients see RSV1 — see ensure-sankofa-phoenix-graphql-ws-remove-fastify-websocket-7800.sh). + * + * Usage: + * node scripts/verify/smoke-phoenix-graphql-ws-subscription.mjs + * PHOENIX_GRAPHQL_WSS_URL=wss://host/graphql-ws node scripts/verify/smoke-phoenix-graphql-ws-subscription.mjs + */ +import { createRequire } from 'node:module'; +import { fileURLToPath } from 'node:url'; +import path from 'node:path'; + +const require = createRequire(import.meta.url); +const repoRoot = path.resolve(path.dirname(fileURLToPath(import.meta.url)), '..', '..'); +const WebSocket = require(path.join(repoRoot, 'node_modules', 'ws')); + +const url = process.env.PHOENIX_GRAPHQL_WSS_URL || 'wss://phoenix.sankofa.nexus/graphql-ws'; +const timeoutMs = Number(process.env.PHOENIX_WS_SUB_TIMEOUT_MS || 15000); + +const ws = new WebSocket(url, ['graphql-transport-ws'], { perMessageDeflate: false }); + +const timer = setTimeout(() => { + console.error('TIMEOUT waiting for connection_ack'); + ws.terminate(); + process.exit(1); +}, timeoutMs); + +ws.on('open', () => { + ws.send(JSON.stringify({ type: 'connection_init' })); +}); + +ws.on('message', (data) => { + const text = String(data); + let msg; + try { + msg = JSON.parse(text); + } catch { + console.error('Non-JSON message:', text.slice(0, 200)); + return; + } + if (msg.type === 'connection_ack') { + clearTimeout(timer); + console.log('OK: graphql-ws connection_ack'); + ws.close(1000, 'smoke-ok'); + process.exit(0); + } + if (msg.type === 'ping') { + ws.send(JSON.stringify({ type: 'pong' })); + return; + } + console.log('msg:', msg.type, JSON.stringify(msg).slice(0, 200)); +}); + +ws.on('error', (err) => { + clearTimeout(timer); + console.error('WebSocket error:', err.message); + process.exit(2); +}); + +ws.on('close', (code, reason) => { + clearTimeout(timer); + if (code !== 1000) { + console.error('Closed:', code, String(reason)); + process.exit(3); + } +}); diff --git a/scripts/verify/smoke-phoenix-graphql-wss-public.sh b/scripts/verify/smoke-phoenix-graphql-wss-public.sh new file mode 100755 index 00000000..95868309 --- /dev/null +++ b/scripts/verify/smoke-phoenix-graphql-wss-public.sh @@ -0,0 +1,66 @@ +#!/usr/bin/env bash +# Smoke: WebSocket upgrade to Phoenix GraphQL WS (graphql-transport-ws) end-to-end through TLS. +# Uses curl --http1.1 (HTTP/2 cannot complete WS upgrade on many edges). Expects HTTP 101. +# Each successful probe waits up to PHOENIX_WSS_CURL_MAXTIME seconds (default 8): curl has no EOF on WS. +# +# Usage: +# bash scripts/verify/smoke-phoenix-graphql-wss-public.sh +# PHOENIX_GRAPHQL_WSS_URL=wss://phoenix.example/graphql-ws bash scripts/verify/smoke-phoenix-graphql-wss-public.sh +# Optional LAN hub (no TLS): +# PHOENIX_GRAPHQL_WS_LAN=http://192.168.11.50:8080/graphql-ws PHOENIX_WS_HOST_HEADER=phoenix.sankofa.nexus \ +# bash scripts/verify/smoke-phoenix-graphql-wss-public.sh +set -euo pipefail +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" +# shellcheck source=/dev/null +source "${PROJECT_ROOT}/scripts/lib/load-project-env.sh" 2>/dev/null || true + +PUBLIC_WSS="${PHOENIX_GRAPHQL_WSS_URL:-https://phoenix.sankofa.nexus/graphql-ws}" +LAN_WS="${PHOENIX_GRAPHQL_WS_LAN:-}" +LAN_HOST="${PHOENIX_WS_HOST_HEADER:-phoenix.sankofa.nexus}" +# After HTTP 101, curl waits for the WebSocket stream until --max-time (no clean EOF). Keep this +# modest so LAN+public probes do not sit for minutes (override with PHOENIX_WSS_CURL_MAXTIME if needed). +CURL_MAXTIME="${PHOENIX_WSS_CURL_MAXTIME:-8}" +# Opt-in LAN hub probe (same CT, HTTP): PHOENIX_WSS_INCLUDE_LAN=1 with load-project-env / ip-addresses +if [[ "${PHOENIX_WSS_INCLUDE_LAN:-0}" == "1" && -z "$LAN_WS" && -n "${IP_SANKOFA_PHOENIX_API:-}" ]]; then + LAN_WS="http://${IP_SANKOFA_PHOENIX_API}:8080/graphql-ws" +fi + +probe_upgrade() { + local name="$1" + local url="$2" + shift 2 + echo "--- ${name}" + echo " URL: ${url}" + local first + first="$(curl --http1.1 -sS --connect-timeout 10 -m "${CURL_MAXTIME}" -D- -o /dev/null \ + "$@" \ + -H 'Connection: Upgrade' \ + -H 'Upgrade: websocket' \ + -H 'Sec-WebSocket-Version: 13' \ + -H 'Sec-WebSocket-Key: dGhlIHNhbXBsZSBub25jZQ==' \ + -H 'Sec-WebSocket-Protocol: graphql-transport-ws' \ + "$url" 2>&1 | head -1 | tr -d '\r')" + if [[ "$first" == *"101"* ]]; then + echo " OK (${first})" + return 0 + fi + echo " FAIL (expected HTTP/1.1 101; first line: ${first})" + return 1 +} + +echo "=== smoke-phoenix-graphql-wss-public (curl WS upgrade) ===" +fail=0 +probe_upgrade "Public WSS (NPM → hub → Apollo)" "$PUBLIC_WSS" || fail=1 + +if [[ -n "$LAN_WS" ]]; then + probe_upgrade "LAN hub (optional)" "$LAN_WS" -H "Host: ${LAN_HOST}" || fail=1 +fi + +if [[ "$fail" -ne 0 ]]; then + echo "" + echo "RESULT: one or more upgrade probes failed." + exit 1 +fi +echo "" +echo "RESULT: WebSocket upgrade path OK (HTTP 101). Full handshake: pnpm run verify:phoenix-graphql-ws-subscription" diff --git a/scripts/verify/verify-sankofa-consolidated-hub-lan.sh b/scripts/verify/verify-sankofa-consolidated-hub-lan.sh new file mode 100755 index 00000000..7f34d839 --- /dev/null +++ b/scripts/verify/verify-sankofa-consolidated-hub-lan.sh @@ -0,0 +1,80 @@ +#!/usr/bin/env bash +# Read-only LAN checks for Sankofa Phoenix + dbis_core + optional Keycloak / corporate web. +# Exit 0 if all probes that are attempted succeed; exit 1 if any required probe fails. +# Phoenix: prefers Tier-1 hub :8080 /health when hub is up (Apollo may be 127.0.0.1:4000 only). +# Optional: SANKOFA_VERIFY_PHOENIX_DIRECT_PORT=1 to probe direct :4000 (fails when loopback-bound). +set -euo pipefail +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" +# shellcheck source=/dev/null +source "${PROJECT_ROOT}/scripts/lib/load-project-env.sh" + +fail=0 +probe() { + local name="$1" url="$2" + echo "--- ${name}: ${url}" + if curl -fsS -m 8 -o /dev/null -w " HTTP %{http_code}\n" "$url"; then + return 0 + fi + echo " FAIL" + fail=1 +} + +echo "=== Sankofa / Phoenix / DBIS LAN readiness (read-only) ===" +echo "" + +# Prefer Tier-1 hub :8080 when present (Apollo may bind loopback :4000 only). +PH_HUB_HEALTH="http://${IP_SANKOFA_PHOENIX_API}:${SANKOFA_API_HUB_LISTEN_PORT:-8080}/health" +PH_DIRECT_HEALTH="http://${IP_SANKOFA_PHOENIX_API}:${SANKOFA_PHOENIX_API_PORT}/health" +if curl -fsS -m 6 "$PH_HUB_HEALTH" -o /dev/null 2>/dev/null; then + probe "Phoenix API hub /health" "$PH_HUB_HEALTH" +else + probe "Phoenix API /health (direct Apollo)" "$PH_DIRECT_HEALTH" +fi +if [[ "${SANKOFA_VERIFY_PHOENIX_DIRECT_PORT:-0}" == "1" ]]; then + probe "Phoenix API /health (direct :${SANKOFA_PHOENIX_API_PORT}, optional)" "$PH_DIRECT_HEALTH" || true +fi +probe "Portal /" "http://${IP_SANKOFA_PORTAL}:${SANKOFA_PORTAL_PORT}/" + +if [[ -n "${IP_SANKOFA_PUBLIC_WEB:-}" ]]; then + probe "Corporate web /" "http://${IP_SANKOFA_PUBLIC_WEB}:${SANKOFA_PUBLIC_WEB_PORT:-3000}/" || true +fi + +if [[ -n "${IP_DBIS_API:-}" ]]; then + probe "dbis_core /health" "http://${IP_DBIS_API}:3000/health" || true +fi + +# Keycloak (7802 typical) — optional +if [[ -n "${IP_KEYCLOAK:-}" ]] || [[ -n "${KEYCLOAK_URL:-}" ]]; then + _kc="${IP_KEYCLOAK:-}" + if [[ -z "$_kc" && "${KEYCLOAK_URL:-}" =~ http://([^:/]+) ]]; then + _kc="${BASH_REMATCH[1]}" + fi + if [[ -n "$_kc" ]]; then + echo "--- Keycloak (optional): realm metadata" + curl -fsS -m 8 -o /dev/null -w " HTTP %{http_code} http://${_kc}:8080/realms/master\n" "http://${_kc}:8080/realms/master" \ + || echo " SKIP (unreachable or wrong path)" + fi +fi + +echo "" +echo "Resolved hub env (for NPM / nginx cutover):" +echo " IP_SANKOFA_WEB_HUB=${IP_SANKOFA_WEB_HUB:-} port ${SANKOFA_WEB_HUB_PORT:-}" +echo " IP_SANKOFA_PHOENIX_API_HUB=${IP_SANKOFA_PHOENIX_API_HUB:-} port ${SANKOFA_PHOENIX_API_HUB_PORT:-}" +echo "" + +echo "--- Phoenix Tier-1 API hub (informational, :8080)" +HUB_LAN="http://${IP_SANKOFA_PHOENIX_API}:${SANKOFA_API_HUB_LISTEN_PORT:-8080}" +if curl -fsS -m 6 "${HUB_LAN}/health" -o /dev/null 2>/dev/null; then + echo " OK ${HUB_LAN}/health (also covered by required probe above when hub is primary)" +else + echo " SKIP (no hub on ${HUB_LAN} — install: install-sankofa-api-hub-nginx-on-pve.sh)" +fi +echo "" + +if [[ "$fail" -ne 0 ]]; then + echo "RESULT: one or more probes failed." + exit 1 +fi +echo "RESULT: required probes OK." +exit 0