diff --git a/.gitignore b/.gitignore index 4babe24..0045c58 100644 --- a/.gitignore +++ b/.gitignore @@ -60,6 +60,9 @@ docs/04-configuration/coingecko/logos/*.png # Ephemeral phase markers .phase1-event-status +# DBIS Phase 1 discovery — timestamped reports (run scripts/verify/run-phase1-discovery.sh) +reports/phase1-discovery/phase1-discovery-*.md + # OMNL operator rail (env-specific IDs, reconciliation, audit packets, posted refs) ids.env reconciliation/ diff --git a/dbis_chain_138_technical_master_plan.md b/dbis_chain_138_technical_master_plan.md new file mode 100644 index 0000000..3ef9ac8 --- /dev/null +++ b/dbis_chain_138_technical_master_plan.md @@ -0,0 +1,461 @@ +# DBIS Chain 138 Technical Master Plan + +## Purpose +This document is the governance and execution baseline for DBIS Chain 138 infrastructure. It is intentionally grounded in repo-backed and operator-verified reality, so it can be used for audits, deployment planning, and readiness decisions without confusing `currently deployed`, `under validation`, and `future-state` work. + +The objective is to move from architecture theory to a production-grade sovereign deployment program that is evidence-based, phased, and operationally auditable. + +--- + +# SECTION 1 — MASTER OBJECTIVES + +## Primary objectives + +1. Inventory currently installed stack components and host placement. +2. Validate actual service readiness, not just declared architecture. +3. Standardize Proxmox VE deployment topology and preferred workload placement. +4. Assign infrastructure ownership across ecosystem entities once governance is finalized. +5. Define production-grade deployment and verification workflows. +6. Track the gap between today’s footprint and sovereign target-state architecture. +7. Produce auditable artifacts that operators can regenerate and maintain. + +--- + +# SECTION 2 — CURRENT STACK STATUS + +## Deployed now + +- Hyperledger Besu (QBFT, Chain 138) +- Hyperledger Fabric containers and VMIDs are allocated +- Hyperledger Indy containers and VMIDs are allocated +- Hyperledger FireFly primary container footprint exists +- Blockscout / explorer stack +- Hyperledger Caliper hook and performance guidance (documentation only) + +## Partially deployed / under validation + +- Hyperledger FireFly: + - primary `6200` is restored as a minimal local FireFly API footprint + - secondary `6201` is present as a stopped container but currently behaves like standby / incomplete deployment +- Hyperledger Fabric: + - `6000`, `6001`, `6002` are present and running at the CT layer + - current app-level verification does not yet show active Fabric peer / orderer workloads inside those CTs +- Hyperledger Indy: + - `6400`, `6401`, `6402` are present and running at the CT layer + - current app-level verification does not yet show active Indy node listeners on the expected ports + +## Planned / aspirational + +- Hyperledger Aries as a proven deployed service tier +- Hyperledger AnonCreds as an operationally verified deployed layer +- Hyperledger Ursa as a required runtime dependency +- Hyperledger Quilt +- Hyperledger Avalon +- Hyperledger Cacti as a proven live interoperability layer +- Full multi-region sovereignized Proxmox with Ceph-backed storage and segmented production VLANs + +--- + +# SECTION 3 — CURRENT ENVIRONMENT DISCOVERY + +## Canonical discovery artifacts + +The source-of-truth discovery path for current state is: + +- [docs/02-architecture/DBIS_NODE_ROLE_MATRIX.md](docs/02-architecture/DBIS_NODE_ROLE_MATRIX.md) +- [docs/03-deployment/PHASE1_DISCOVERY_RUNBOOK.md](docs/03-deployment/PHASE1_DISCOVERY_RUNBOOK.md) +- [docs/03-deployment/DBIS_HYPERLEDGER_RUNTIME_STATUS.md](docs/03-deployment/DBIS_HYPERLEDGER_RUNTIME_STATUS.md) +- [scripts/verify/run-phase1-discovery.sh](scripts/verify/run-phase1-discovery.sh) +- [config/proxmox-operational-template.json](config/proxmox-operational-template.json) +- [docs/04-configuration/ALL_VMIDS_ENDPOINTS.md](docs/04-configuration/ALL_VMIDS_ENDPOINTS.md) +- [docs/02-architecture/PHYSICAL_HARDWARE_INVENTORY.md](docs/02-architecture/PHYSICAL_HARDWARE_INVENTORY.md) + +## Discovery scope + +Reality mapping must validate: + +1. Proxmox hosts and cluster health +2. VMID / CT inventory versus template JSON +3. Besu validators, sentries, and RPC tiers +4. Explorer and public RPC availability +5. Hyperledger CT presence and app-level readiness where possible +6. Storage topology and current backing stores +7. Network topology and current LAN / VLAN reality +8. ML110 role reality versus migration plan + +## Required outputs + +Every discovery run should produce: + +- Infrastructure inventory report +- Service state map +- Dependency context +- Critical failure summary + +The markdown report is evidence capture; the script exit code is the pass/fail signal. + +--- + +# SECTION 4 — PROXMOX VE DEPLOYMENT DESIGN + +## Current state + +- Current cluster footprint is smaller than the target sovereign model. +- Current storage is primarily local ZFS / LVM-based, not Ceph-backed distributed storage. +- Current workload placement is represented as `preferred host` in the planning template, not guaranteed live placement. + +## Target model + +- Multi-node Proxmox VE cluster with stable quorum +- HA-aware workload placement +- Dedicated roles for core compute, RPC exposure, identity/workflow DLT, ingress, and future storage tiers + +## Current interpretation rule + +This plan must not describe the target sovereignized Proxmox model as already achieved. All references to HA, Ceph, dedicated storage nodes, or dedicated network nodes are roadmap items unless Phase 1 evidence proves they are already active. + +--- + +# SECTION 5 — NETWORK ARCHITECTURE + +## Current network reality + +- Primary active management / services LAN is `192.168.11.0/24` +- Public ingress is fronted through NPMplus / edge services +- RPC exposure is already tiered across core, public, private, named, and thirdweb-facing nodes + +## Target network layers + +1. Management network +2. Storage replication network +3. Blockchain validator / P2P network +4. Identity / workflow DLT network +5. Public access / DMZ network +6. Validator-only restricted paths + +## Status + +- Public access and RPC role separation exist in practice. +- Full sovereign segmentation with dedicated VLANs and zero-trust internal routing remains roadmap work. + +--- + +# SECTION 6 — ENTITY ASSIGNMENT MODEL + +## Governance model + +The entity-assignment model remains valid as a target governance structure: + +- DBIS Core Authority +- Central Banks +- International Financial Institutions +- Regional Operators + +## Current status + +- Entity ownership for many deployed workloads is still `TBD` in the operational matrix. +- Until governance assigns final owners, operator documentation must keep those fields explicitly marked as `TBD` rather than inventing ownership. + +The executable placement artifact is: + +- [docs/02-architecture/DBIS_NODE_ROLE_MATRIX.md](docs/02-architecture/DBIS_NODE_ROLE_MATRIX.md) + +--- + +# SECTION 7 — VM AND CONTAINER DESIGN + +## Current status by workload family + +### Deployed now + +- Settlement / Besu VM family +- Explorer / observability family +- Ingress / proxy family +- Application and DBIS-support workloads + +### Partially deployed / under validation + +- Workflow VM / CT family for FireFly +- Institutional VM / CT family for Fabric +- Identity VM / CT family for Indy + +### Planned / aspirational + +- Identity VM template that includes proven Aries + AnonCreds runtime +- Interoperability VM template for true Hyperledger Cacti usage + +## Implementation rule + +Template language in this plan must map to actual repo artifacts and actual VMIDs, not hypothetical inventory. + +--- + +# SECTION 8 — STORAGE ARCHITECTURE + +## Current state + +- Current guest storage is backed by local Proxmox storage pools. +- Ceph-backed distributed storage is not yet an achieved platform baseline. + +## Target state + +- Ceph or equivalent distributed storage tier +- Snapshot-aware backup strategy by workload class +- Archive and audit retention policy + +## Roadmap artifact + +- [docs/02-architecture/DBIS_PHASE2_PROXMOX_SOVEREIGNIZATION_ROADMAP.md](docs/02-architecture/DBIS_PHASE2_PROXMOX_SOVEREIGNIZATION_ROADMAP.md) + +--- + +# SECTION 9 — SECURITY ARCHITECTURE + +## Current baseline + +- Chain 138 validator, sentry, and RPC tiering exists as an operational pattern. +- Public RPC capability validation is now script-backed. +- Explorer and wallet metadata are now explicitly documented and verifiable. + +## Target-state controls + +- HSM-backed key management +- stronger secrets segregation +- certificate hierarchy and operator MFA +- formalized tier-to-tier firewall policy + +## Status + +These remain partially implemented and must not be represented as fully complete without separate evidence. + +--- + +# SECTION 10 — GOVERNANCE ARCHITECTURE + +## Target + +- validator governance across multiple entities +- admission control +- key rotation +- emergency controls + +## Current state + +- Chain 138 validator topology exists +- final multi-entity validator governance assignment is still pending + +This section remains a target architecture section, not a statement of fully executed governance. + +--- + +# SECTION 11 — FIREFLY WORKFLOW ARCHITECTURE + +## Current state + +- FireFly primary footprint exists and now exposes a local API again. +- Current restored `6200` configuration is a minimal local gateway profile for stability and API availability. +- Full multiparty FireFly workflow behavior across blockchain, shared storage, and data exchange is not yet evidenced as healthy in the current container deployment. + +## Program objective + +Use FireFly as the workflow layer only after: + +1. primary and secondary footprint are clearly defined +2. connector/plugin model is explicit +3. upstream blockchain and shared-storage dependencies are validated + +--- + +# SECTION 12 — CROSS-CHAIN INTEROPERABILITY DESIGN + +## Current state + +- CCIP relay and Chain 138 cross-chain infrastructure exist in the broader stack. +- Hyperledger Cacti is not currently proven as the live interoperability engine for DBIS in this environment. + +## Planning rule + +This plan must refer to Cacti as `future / optional` until a deployed and validated Cacti environment is evidenced in discovery artifacts. + +--- + +# SECTION 13 — DEVSECOPS PIPELINE + +## Required execution model + +1. Source control +2. Build / validation +3. Security and config review +4. Service verification +5. Deployment +6. Monitoring and readiness evidence + +## Repo-backed implementation + +- discovery scripts +- RPC health checks +- route / explorer verification +- operator runbooks +- submodule hygiene and deployment docs + +The pipeline is partially implemented via scripts and runbooks; it is not yet a single unified CI/CD system for every DBIS workload. + +--- + +# SECTION 14 — PERFORMANCE VALIDATION + +## Current state + +- Hyperledger Caliper is not vendored in this repo. +- A documented performance hook exists instead of a committed benchmark harness. + +## Canonical artifact + +- [docs/03-deployment/CALIPER_CHAIN138_PERF_HOOK.md](docs/03-deployment/CALIPER_CHAIN138_PERF_HOOK.md) + +## Interpretation rule + +Performance benchmarking is planned and documented, but not yet a routine automated readiness gate. + +--- + +# SECTION 15 — MONITORING AND OBSERVABILITY + +## Deployed now + +- Explorer / Blockscout +- Besu RPC health verification +- operational checks and route verification scripts + +## Partially deployed / under validation + +- Hyperledger-side service health beyond CT status +- unified status reporting for the broader DLT stack + +--- + +# SECTION 16 — DISASTER RECOVERY DESIGN + +## Target state + +- RPO / RTO by workload tier +- cross-site replication +- cold / standby recovery paths + +## Current state + +DR remains a program requirement, not a fully evidenced completed deployment capability. + +--- + +# SECTION 17 — PRODUCTION DEPLOYMENT WORKFLOW + +## Phase 1 — Reality mapping + +Canonical implementation: + +- [scripts/verify/run-phase1-discovery.sh](scripts/verify/run-phase1-discovery.sh) +- [docs/03-deployment/PHASE1_DISCOVERY_RUNBOOK.md](docs/03-deployment/PHASE1_DISCOVERY_RUNBOOK.md) + +## Phase 2 — Sovereignization roadmap + +Canonical implementation: + +- [docs/02-architecture/DBIS_PHASE2_PROXMOX_SOVEREIGNIZATION_ROADMAP.md](docs/02-architecture/DBIS_PHASE2_PROXMOX_SOVEREIGNIZATION_ROADMAP.md) + +## Phase 3 — Liveness and production-simulation wrapper + +Canonical implementation: + +- [scripts/verify/run-dbis-phase3-e2e-simulation.sh](scripts/verify/run-dbis-phase3-e2e-simulation.sh) +- [docs/03-deployment/DBIS_PHASE3_E2E_PRODUCTION_SIMULATION_RUNBOOK.md](docs/03-deployment/DBIS_PHASE3_E2E_PRODUCTION_SIMULATION_RUNBOOK.md) + +--- + +# SECTION 18 — END-TO-END PRODUCTION FLOW + +## Reference flow + +1. Identity issued +2. Credential verified +3. Workflow triggered +4. Settlement executed +5. Cross-chain sync +6. Compliance recorded +7. Final settlement confirmed + +## Current interpretation + +This is the target business flow. Current automation verifies only selected infrastructure slices of that flow: + +- Besu liveness +- optional FireFly HTTP +- operator-guided manual follow-ups for Indy / Fabric / CCIP + +It must not be represented as fully automated end-to-end execution today. + +--- + +# SECTION 19 — EXECUTION DIRECTIVES + +Cursor / operators should execute the following in order: + +1. Run Phase 1 discovery and review the critical failure summary. +2. Reconcile node-role matrix conflicts, especially duplicate IP planning entries. +3. Validate live Hyperledger CTs at the app layer, not only CT status. +4. Track sovereignization gaps in the Phase 2 roadmap. +5. Run the Phase 3 liveness wrapper and manual follow-ups. +6. Produce or refresh readiness evidence. + +These directives must map to repo scripts and docs, not hypothetical tooling. + +--- + +# SECTION 20 — EXPECTED DELIVERABLES + +The executable deliverables in this repository are: + +1. Infrastructure inventory report +2. Node role assignment map +3. Phase 2 sovereignization roadmap +4. Phase 3 liveness simulation runbook +5. Caliper performance hook +6. Operator readiness checklist + +Separate security compliance and benchmark reports remain future deliverables unless explicitly generated. + +--- + +# SECTION 21 — CURRENT GAPS + +## Infrastructure gaps + +- FireFly secondary `6201` is currently stopped and should be treated as standby / incomplete until intentionally reactivated. +- Fabric CTs are present, but current app-level verification does not yet prove active Fabric peer or orderer services. +- Indy CTs are present, but current app-level verification does not yet prove active Indy validator listeners. +- The current per-node app-level evidence table is maintained in [docs/03-deployment/DBIS_HYPERLEDGER_RUNTIME_STATUS.md](docs/03-deployment/DBIS_HYPERLEDGER_RUNTIME_STATUS.md). + +## Platform gaps + +- Ceph-backed distributed storage is still roadmap work. +- Full VLAN / sovereign network segmentation is still roadmap work. +- Final entity ownership assignments remain incomplete. + +## Planning gaps + +- Future-state architecture items must remain clearly labeled as planned, not deployed. + +--- + +# SECTION 22 — IMPLEMENTATION ARTIFACTS + +Executable counterparts in this repository: + +| Deliverable | Location | +|-------------|----------| +| Node Role Matrix | `docs/02-architecture/DBIS_NODE_ROLE_MATRIX.md` | +| Phase 1 discovery | `scripts/verify/run-phase1-discovery.sh`, `docs/03-deployment/PHASE1_DISCOVERY_RUNBOOK.md`, `reports/phase1-discovery/` | +| Phase 2 roadmap | `docs/02-architecture/DBIS_PHASE2_PROXMOX_SOVEREIGNIZATION_ROADMAP.md` | +| Phase 3 liveness wrapper | `scripts/verify/run-dbis-phase3-e2e-simulation.sh`, `docs/03-deployment/DBIS_PHASE3_E2E_PRODUCTION_SIMULATION_RUNBOOK.md` | +| Caliper hook | `docs/03-deployment/CALIPER_CHAIN138_PERF_HOOK.md`, `scripts/verify/print-caliper-chain138-stub.sh` | +| Operator readiness checklist | `docs/00-meta/OPERATOR_READY_CHECKLIST.md` section 10 | diff --git a/docs/00-meta/OPERATOR_READY_CHECKLIST.md b/docs/00-meta/OPERATOR_READY_CHECKLIST.md index 6c9c823..22b9a00 100644 --- a/docs/00-meta/OPERATOR_READY_CHECKLIST.md +++ b/docs/00-meta/OPERATOR_READY_CHECKLIST.md @@ -1,6 +1,6 @@ # Operator Ready Checklist — Copy-Paste Commands -**Last Updated:** 2026-03-27 +**Last Updated:** 2026-03-28 **Purpose:** Single page with exact commands to complete every pending todo. Run from **repo root** on a host with **LAN** access (and `smom-dbis-138/.env` with `PRIVATE_KEY`, `NPM_PASSWORD` where noted). **Do you have all necessary creds?** See [OPERATOR_CREDENTIALS_CHECKLIST.md](OPERATOR_CREDENTIALS_CHECKLIST.md) — per-task list of LAN, PRIVATE_KEY, NPM_PASSWORD, RPC_URL_138, SSH, LINK, gas, token balance. @@ -276,6 +276,21 @@ This is intentionally deferred with the rest of the Wemix path. If the chain is --- +## 10. DBIS Chain 138 — phased production path (matrix-driven) + +**Ref:** [dbis_chain_138_technical_master_plan.md](../../dbis_chain_138_technical_master_plan.md), [DBIS_NODE_ROLE_MATRIX.md](../02-architecture/DBIS_NODE_ROLE_MATRIX.md) + +| Phase | Action | +|-------|--------| +| 1 — Reality mapping | `bash scripts/verify/run-phase1-discovery.sh` (optional: `HYPERLEDGER_PROBE=1`). Reports: `reports/phase1-discovery/`. Runbook: [PHASE1_DISCOVERY_RUNBOOK.md](../03-deployment/PHASE1_DISCOVERY_RUNBOOK.md). | +| 2 — Sovereignization roadmap | Read [DBIS_PHASE2_PROXMOX_SOVEREIGNIZATION_ROADMAP.md](../02-architecture/DBIS_PHASE2_PROXMOX_SOVEREIGNIZATION_ROADMAP.md); execute milestones (cluster expansion, Ceph, VLANs) as prioritized. | +| 3 — E2E simulation | `bash scripts/verify/run-dbis-phase3-e2e-simulation.sh` (optional: `RUN_CHAIN138_RPC_HEALTH=1`). Full flow + Indy/Fabric/CCIP manual steps: [DBIS_PHASE3_E2E_PRODUCTION_SIMULATION_RUNBOOK.md](../03-deployment/DBIS_PHASE3_E2E_PRODUCTION_SIMULATION_RUNBOOK.md). | +| Perf (Caliper) | `bash scripts/verify/print-caliper-chain138-stub.sh` — then [CALIPER_CHAIN138_PERF_HOOK.md](../03-deployment/CALIPER_CHAIN138_PERF_HOOK.md). | + +**Readiness:** Resolve critical **Entity owner** / **Region** **TBD** rows in the Node Role Matrix before claiming multi-entity production governance. + +--- + ## References - [COMPLETE_REQUIRED_OPTIONAL_RECOMMENDED_INDEX.md](COMPLETE_REQUIRED_OPTIONAL_RECOMMENDED_INDEX.md) — full plan (required, optional, recommended) diff --git a/docs/02-architecture/DBIS_NODE_ROLE_MATRIX.md b/docs/02-architecture/DBIS_NODE_ROLE_MATRIX.md new file mode 100644 index 0000000..5878c80 --- /dev/null +++ b/docs/02-architecture/DBIS_NODE_ROLE_MATRIX.md @@ -0,0 +1,168 @@ +# DBIS Node Role Matrix + +**Last updated:** 2026-03-29 (UTC) — regenerate machine-derived rows: `bash scripts/docs/generate-dbis-node-role-matrix-md.sh` +**Status:** Active — infrastructure constitution for DBIS Chain 138 and colocated workloads. + +## Purpose + +This matrix assigns **node type**, **preferred host placement**, **validator/signing role** (for Besu), and **security tier** per workload. It implements the entity-placement model in [dbis_chain_138_technical_master_plan.md](../../dbis_chain_138_technical_master_plan.md) (Sections 6–7) in a form operators can maintain. + +**Canonical pairs (keep in sync):** + +- Human detail and status: [ALL_VMIDS_ENDPOINTS.md](../04-configuration/ALL_VMIDS_ENDPOINTS.md) +- Machine-readable services: [config/proxmox-operational-template.json](../../config/proxmox-operational-template.json) + +When you change VMID, IP, hostname, or placement, update **ALL_VMIDS** and **operational-template.json** first, then regenerate the table below with this script (or edit the static sections manually). + +## Columns + +| Column | Meaning | +|--------|---------| +| **Entity owner** | DBIS Core, Central Bank, IFI, Regional Operator, etc. — use **TBD** until governance assigns. | +| **Region** | Geographic or site label — **TBD** until multi-site is formalized. | +| **IP note** | Flags duplicate IPv4 entries in the planning template. A duplicate means **shared or historical mapping**, not concurrent ownership — verify live owner in ALL_VMIDS or on-cluster. | +| **Preferred host** | Preferred Proxmox node (`r630-01`, `r630-02`, `ml110`, `any`). This is a planning target, not an assertion of current placement. | +| **Validator / signing** | For Chain 138 Besu: QBFT signer, sentry (no signer), RPC-only, or N/A. | +| **Security tier** | High-level zone: validator-tier, DMZ/RPC, edge ingress, identity/DLT, application, etc. | + +## Proxmox hypervisor nodes + +| Hostname | MGMT IP | Cluster | Role (summary) | +|----------|---------|---------|------------------| +| ml110 | 192.168.11.10 | h — verify | legacy_cluster_member_or_wan_aggregator | +| r630-01 | 192.168.11.11 | h | primary_compute_chain138_rpc_ccip_relay_sankofa | +| r630-02 | 192.168.11.12 | h | firefly_npmplus_secondary_mim4u_mifos_support | + +## Workloads (from operational template) + +Machine-derived rows below come from `services[]` in `config/proxmox-operational-template.json`. Duplicate IPv4 notes are warnings that the planning template still contains alternative or legacy ownership for the same address; they must not be read as concurrent live allocations. + +| VMID | Hostname | IPv4 | IP note | Node type | Entity owner | Region | Preferred host | Validator / signing | Security tier | +|------|----------|------|---------|-----------|--------------|--------|----------------|---------------------|---------------| +| — | order-redis-primary | 192.168.11.38 | unique in template | The Order service | TBD | TBD | r630-01 | N/A | application | +| 100 | proxmox-mail-gateway | 192.168.11.32 | unique in template | Infra LXC | TBD | TBD | r630-02 | N/A | management / secrets | +| 101 | proxmox-datacenter-manager | 192.168.11.33 | unique in template | Infra LXC | TBD | TBD | r630-02 | N/A | management / secrets | +| 102 | cloudflared | 192.168.11.34 | unique in template | Cloudflare tunnel | TBD | TBD | r630-01 | N/A | edge ingress | +| 103 | omada | 192.168.11.30 | unique in template | Infra LXC | TBD | TBD | r630-02 | N/A | management / secrets | +| 104 | gitea | 192.168.11.31 | unique in template | Infra LXC | TBD | TBD | r630-02 | N/A | management / secrets | +| 105 | nginxproxymanager | 192.168.11.26 | unique in template | Legacy NPM | TBD | TBD | r630-02 | N/A | standard internal | +| 130 | monitoring-1 | 192.168.11.27 | unique in template | Monitoring | TBD | TBD | r630-02 | N/A | standard internal | +| 1000 | besu-validator-1 | 192.168.11.100 | unique in template | Besu validator | TBD | TBD | r630-01 | QBFT signer | validator-tier | +| 1001 | besu-validator-2 | 192.168.11.101 | unique in template | Besu validator | TBD | TBD | r630-01 | QBFT signer | validator-tier | +| 1002 | besu-validator-3 | 192.168.11.102 | unique in template | Besu validator | TBD | TBD | r630-01 | QBFT signer | validator-tier | +| 1003 | besu-validator-4 | 192.168.11.103 | unique in template | Besu validator | TBD | TBD | r630-01 | QBFT signer | validator-tier | +| 1004 | besu-validator-5 | 192.168.11.104 | unique in template | Besu validator | TBD | TBD | r630-01 | QBFT signer | validator-tier | +| 1500 | besu-sentry-1 | 192.168.11.150 | unique in template | Besu sentry | TBD | TBD | r630-01 | Sentry (no signer) | validator-tier | +| 1501 | besu-sentry-2 | 192.168.11.151 | unique in template | Besu sentry | TBD | TBD | r630-01 | Sentry (no signer) | validator-tier | +| 1502 | besu-sentry-3 | 192.168.11.152 | unique in template | Besu sentry | TBD | TBD | r630-01 | Sentry (no signer) | validator-tier | +| 1503 | besu-sentry-4 | 192.168.11.153 | unique in template | Besu sentry | TBD | TBD | r630-01 | Sentry (no signer) | validator-tier | +| 1504 | besu-sentry-ali | 192.168.11.154 | unique in template | Besu sentry | TBD | TBD | r630-01 | Sentry (no signer) | validator-tier | +| 1505 | besu-sentry-alltra-1 | 192.168.11.213 | unique in template | Besu sentry | TBD | TBD | r630-01 | Sentry (no signer) | validator-tier | +| 1506 | besu-sentry-alltra-2 | 192.168.11.214 | unique in template | Besu sentry | TBD | TBD | r630-01 | Sentry (no signer) | validator-tier | +| 1507 | besu-sentry-hybx-1 | 192.168.11.244 | unique in template | Besu sentry | TBD | TBD | ml110 | Sentry (no signer) | validator-tier | +| 1508 | besu-sentry-hybx-2 | 192.168.11.245 | unique in template | Besu sentry | TBD | TBD | ml110 | Sentry (no signer) | validator-tier | +| 2101 | besu-rpc-core-1 | 192.168.11.211 | unique in template | Besu RPC (rpc_core) | TBD | TBD | r630-01 | RPC only | DMZ / RPC exposure | +| 2102 | besu-rpc-core-2 | 192.168.11.212 | unique in template | Besu RPC (rpc_core) | TBD | TBD | r630-01 | RPC only | DMZ / RPC exposure | +| 2103 | besu-rpc-core-thirdweb | 192.168.11.217 | unique in template | Besu RPC (rpc_core) | TBD | TBD | r630-01 | RPC only | DMZ / RPC exposure | +| 2201 | besu-rpc-public-1 | 192.168.11.221 | unique in template | Besu RPC (rpc_public) | TBD | TBD | r630-01 | RPC only | DMZ / RPC exposure | +| 2301 | besu-rpc-private-1 | 192.168.11.232 | unique in template | Besu RPC (rpc_private) | TBD | TBD | r630-01 | RPC only | DMZ / RPC exposure | +| 2303 | besu-rpc-ali-0x8a | 192.168.11.233 | unique in template | Besu RPC (rpc_named) | TBD | TBD | r630-01 | RPC only | DMZ / RPC exposure | +| 2304 | besu-rpc-ali-0x1 | 192.168.11.234 | unique in template | Besu RPC (rpc_named) | TBD | TBD | r630-01 | RPC only | DMZ / RPC exposure | +| 2305 | besu-rpc-luis-0x8a | 192.168.11.235 | unique in template | Besu RPC (rpc_named) | TBD | TBD | r630-01 | RPC only | DMZ / RPC exposure | +| 2306 | besu-rpc-luis-0x1 | 192.168.11.236 | unique in template | Besu RPC (rpc_named) | TBD | TBD | r630-01 | RPC only | DMZ / RPC exposure | +| 2307 | besu-rpc-putu-0x8a | 192.168.11.237 | unique in template | Besu RPC (rpc_named) | TBD | TBD | r630-01 | RPC only | DMZ / RPC exposure | +| 2308 | besu-rpc-putu-0x1 | 192.168.11.238 | unique in template | Besu RPC (rpc_named) | TBD | TBD | r630-01 | RPC only | DMZ / RPC exposure | +| 2400 | thirdweb-rpc-1 | 192.168.11.240 | unique in template | Besu RPC (rpc_thirdweb) | TBD | TBD | r630-01 | RPC only | DMZ / RPC exposure | +| 2401 | besu-rpc-thirdweb-0x8a-1 | 192.168.11.241 | unique in template | Besu RPC (rpc_thirdweb) | TBD | TBD | r630-01 | RPC only | DMZ / RPC exposure | +| 2402 | besu-rpc-thirdweb-0x8a-2 | 192.168.11.242 | unique in template | Besu RPC (rpc_thirdweb) | TBD | TBD | r630-01 | RPC only | DMZ / RPC exposure | +| 2403 | besu-rpc-thirdweb-0x8a-3 | 192.168.11.243 | unique in template | Besu RPC (rpc_thirdweb) | TBD | TBD | r630-01 | RPC only | DMZ / RPC exposure | +| 2500 | besu-rpc-alltra-1 | 192.168.11.172 | unique in template | Besu RPC (rpc_alltra_hybx) | TBD | TBD | r630-01 | RPC only | DMZ / RPC exposure | +| 2501 | besu-rpc-alltra-2 | 192.168.11.173 | unique in template | Besu RPC (rpc_alltra_hybx) | TBD | TBD | r630-01 | RPC only | DMZ / RPC exposure | +| 2502 | besu-rpc-alltra-3 | 192.168.11.174 | unique in template | Besu RPC (rpc_alltra_hybx) | TBD | TBD | r630-01 | RPC only | DMZ / RPC exposure | +| 2503 | besu-rpc-hybx-1 | 192.168.11.246 | unique in template | Besu RPC (rpc_alltra_hybx) | TBD | TBD | r630-01 | RPC only | DMZ / RPC exposure | +| 2504 | besu-rpc-hybx-2 | 192.168.11.247 | unique in template | Besu RPC (rpc_alltra_hybx) | TBD | TBD | r630-01 | RPC only | DMZ / RPC exposure | +| 2505 | besu-rpc-hybx-3 | 192.168.11.248 | unique in template | Besu RPC (rpc_alltra_hybx) | TBD | TBD | r630-01 | RPC only | DMZ / RPC exposure | +| 3000 | ml-node-1 | 192.168.11.60 | unique in template | ML node | TBD | TBD | ml110 | N/A | standard internal | +| 3001 | ml-node-2 | 192.168.11.61 | unique in template | ML node | TBD | TBD | ml110 | N/A | standard internal | +| 3002 | ml-node-3 | 192.168.11.62 | unique in template | ML node | TBD | TBD | ml110 | N/A | standard internal | +| 3003 | ml-node-4 | 192.168.11.63 | unique in template | ML node | TBD | TBD | ml110 | N/A | standard internal | +| 3500 | oracle-publisher-1 | 192.168.11.29 | unique in template | Oracle publisher | TBD | TBD | r630-02 | N/A | standard internal | +| 3501 | ccip-monitor-1 | 192.168.11.28 | unique in template | CCIP monitor | TBD | TBD | r630-02 | N/A | standard internal | +| 5000 | blockscout-1 | 192.168.11.140 | unique in template | Blockscout | TBD | TBD | r630-01 | N/A | standard internal | +| 5010 | tsunamiswap | 192.168.11.91 | unique in template | DeFi | TBD | TBD | r630-01 | N/A | standard internal | +| 5200 | cacti-1 | 192.168.11.80 | unique in template | Cacti | TBD | TBD | r630-02 | N/A | standard internal | +| 5201 | cacti-alltra-1 | 192.168.11.177 | unique in template | Cacti | TBD | TBD | r630-02 | N/A | standard internal | +| 5202 | cacti-hybx-1 | 192.168.11.251 | unique in template | Cacti | TBD | TBD | r630-02 | N/A | standard internal | +| 5700 | dev-vm-gitops | 192.168.11.59 | unique in template | Dev | TBD | TBD | any | N/A | standard internal | +| 5702 | ai-inf-1 | 192.168.11.82 | unique in template | AI infra | TBD | TBD | r630-01 | N/A | standard internal | +| 5705 | ai-inf-2 | 192.168.11.86 | unique in template | AI infra | TBD | TBD | r630-01 | N/A | standard internal | +| 5800 | mifos-fineract | 192.168.11.85 | unique in template | Mifos | TBD | TBD | r630-02 | N/A | standard internal | +| 5801 | dapp-smom | 192.168.11.58 | unique in template | DApp | TBD | TBD | r630-02 | N/A | standard internal | +| 6000 | fabric-1 | 192.168.11.65 | unique in template | Fabric | TBD | TBD | r630-02 | N/A | identity / workflow DLT | +| 6001 | fabric-alltra-1 | 192.168.11.178 | unique in template | Fabric | TBD | TBD | r630-02 | N/A | identity / workflow DLT | +| 6002 | fabric-hybx-1 | 192.168.11.252 | unique in template | Fabric | TBD | TBD | r630-02 | N/A | identity / workflow DLT | +| 6200 | firefly-1 | 192.168.11.35 | shared / non-concurrent mapping — verify live owner | FireFly | TBD | TBD | r630-02 | N/A | identity / workflow DLT | +| 6201 | firefly-ali-1 | 192.168.11.57 | unique in template | FireFly | TBD | TBD | r630-02 | N/A | identity / workflow DLT | +| 6400 | indy-1 | 192.168.11.64 | unique in template | Indy | TBD | TBD | r630-02 | N/A | identity / workflow DLT | +| 6401 | indy-alltra-1 | 192.168.11.179 | unique in template | Indy | TBD | TBD | r630-02 | N/A | identity / workflow DLT | +| 6402 | indy-hybx-1 | 192.168.11.253 | unique in template | Indy | TBD | TBD | r630-02 | N/A | identity / workflow DLT | +| 7800 | sankofa-api-1 | 192.168.11.50 | unique in template | Sankofa / Phoenix | TBD | TBD | r630-01 | N/A | application | +| 7801 | sankofa-portal-1 | 192.168.11.51 | unique in template | Sankofa / Phoenix | TBD | TBD | r630-01 | N/A | application | +| 7802 | sankofa-keycloak-1 | 192.168.11.52 | unique in template | Sankofa / Phoenix | TBD | TBD | r630-01 | N/A | application | +| 7803 | sankofa-postgres-1 | 192.168.11.53 | unique in template | Sankofa / Phoenix | TBD | TBD | r630-01 | N/A | application | +| 7804 | gov-portals-dev | 192.168.11.54 | unique in template | Sankofa / Phoenix | TBD | TBD | r630-01 | N/A | application | +| 7805 | sankofa-studio | 192.168.11.72 | unique in template | Sankofa / Phoenix | TBD | TBD | r630-01 | N/A | application | +| 7810 | mim-web-1 | 192.168.11.37 | shared / non-concurrent mapping — verify live owner | MIM4U | TBD | TBD | r630-02 | N/A | standard internal | +| 7811 | mim-api-1 | 192.168.11.36 | shared / non-concurrent mapping — verify live owner | MIM4U | TBD | TBD | r630-02 | N/A | standard internal | +| 8640 | vault-phoenix-1 | 192.168.11.200 | unique in template | HashiCorp Vault | TBD | TBD | r630-01 | N/A | management / secrets | +| 8641 | vault-phoenix-2 | 192.168.11.215 | unique in template | HashiCorp Vault | TBD | TBD | r630-01 | N/A | management / secrets | +| 8642 | vault-phoenix-3 | 192.168.11.202 | unique in template | HashiCorp Vault | TBD | TBD | r630-01 | N/A | management / secrets | +| 10030 | order-identity | 192.168.11.40 | unique in template | The Order service | TBD | TBD | r630-01 | N/A | application | +| 10040 | order-intake | 192.168.11.41 | unique in template | The Order service | TBD | TBD | r630-01 | N/A | application | +| 10050 | order-finance | 192.168.11.49 | unique in template | The Order service | TBD | TBD | r630-01 | N/A | application | +| 10060 | order-dataroom | 192.168.11.42 | unique in template | The Order service | TBD | TBD | r630-01 | N/A | application | +| 10070 | order-legal | 192.168.11.87 | unique in template | The Order service | TBD | TBD | r630-01 | N/A | application | +| 10080 | order-eresidency | 192.168.11.43 | unique in template | The Order service | TBD | TBD | r630-01 | N/A | application | +| 10090 | order-portal-public | 192.168.11.36 | shared / non-concurrent mapping — verify live owner | The Order service | TBD | TBD | r630-01 | N/A | application | +| 10091 | order-portal-internal | 192.168.11.35 | shared / non-concurrent mapping — verify live owner | The Order service | TBD | TBD | r630-01 | N/A | application | +| 10092 | order-mcp-legal | 192.168.11.37 | shared / non-concurrent mapping — verify live owner | The Order service | TBD | TBD | r630-01 | N/A | application | +| 10100 | dbis-postgres-primary | 192.168.11.105 | unique in template | DBIS stack | TBD | TBD | r630-01 | N/A | application | +| 10101 | dbis-postgres-replica-1 | 192.168.11.106 | unique in template | DBIS stack | TBD | TBD | r630-01 | N/A | application | +| 10120 | dbis-redis | 192.168.11.125 | unique in template | DBIS stack | TBD | TBD | r630-01 | N/A | application | +| 10130 | dbis-frontend | 192.168.11.130 | unique in template | DBIS stack | TBD | TBD | r630-01 | N/A | application | +| 10150 | dbis-api-primary | 192.168.11.155 | unique in template | DBIS stack | TBD | TBD | r630-01 | N/A | application | +| 10151 | dbis-api-secondary | 192.168.11.156 | unique in template | DBIS stack | TBD | TBD | r630-01 | N/A | application | +| 10200 | order-prometheus | 192.168.11.46 | unique in template | The Order service | TBD | TBD | r630-01 | N/A | application | +| 10201 | order-grafana | 192.168.11.47 | unique in template | The Order service | TBD | TBD | r630-01 | N/A | application | +| 10202 | order-opensearch | 192.168.11.48 | unique in template | The Order service | TBD | TBD | r630-01 | N/A | application | +| 10210 | order-haproxy | 192.168.11.39 | unique in template | The Order service | TBD | TBD | r630-01 | N/A | application | +| 10230 | order-vault | 192.168.11.55 | unique in template | The Order service | TBD | TBD | r630-01 | N/A | application | +| 10232 | ct10232 | 192.168.11.56 | unique in template | General CT | TBD | TBD | r630-01 | N/A | standard internal | +| 10233 | npmplus-primary | 192.168.11.167 | unique in template | NPMplus ingress | TBD | TBD | r630-01 | N/A | edge ingress | +| 10234 | npmplus-secondary | 192.168.11.168 | unique in template | NPMplus ingress | TBD | TBD | r630-02 | N/A | edge ingress | +| 10235 | npmplus-alltra-hybx | 192.168.11.169 | unique in template | NPMplus ingress | TBD | TBD | r630-02 | N/A | edge ingress | +| 10236 | npmplus-fourth-dev | 192.168.11.170 | unique in template | NPMplus ingress | TBD | TBD | r630-02 | N/A | edge ingress | +| 10237 | npmplus-mifos | 192.168.11.171 | unique in template | NPMplus ingress | TBD | TBD | r630-02 | N/A | edge ingress | + +## Supplementary rows (not in template JSON) + +These appear in [ALL_VMIDS_ENDPOINTS.md](../04-configuration/ALL_VMIDS_ENDPOINTS.md) but are not modeled as `services[]` entries in `proxmox-operational-template.json`. They are **manual supplements**, not generator-backed source of truth. + +| VMID | Hostname | IPv4 | IP note | Node type | Entity owner | Region | Preferred host | Validator / signing | Security tier | +|------|----------|------|---------|-----------|--------------|--------|----------------|---------------------|---------------| +| 106 | redis-rpc-translator | 192.168.11.110 | manual supplement | RPC translator (Redis) | TBD | TBD | r630-01 (per ALL_VMIDS) | N/A | DMZ / RPC exposure | +| 107 | web3signer-rpc-translator | 192.168.11.111 | manual supplement | RPC translator (Web3Signer) | TBD | TBD | r630-01 | N/A | DMZ / RPC exposure | +| 108 | vault-rpc-translator | 192.168.11.112 | manual supplement | RPC translator (Vault) | TBD | TBD | r630-01 | N/A | management / secrets | + +## Host-level services (no VMID) + +| Name | Location | Node type | Notes | +|------|----------|-----------|-------| +| CCIP relay | r630-01 host `/opt/smom-dbis-138/services/relay` | Cross-chain relay | Uses RPC (e.g. VMID 2201); see [NETWORK_CONFIGURATION_MASTER.md](../11-references/NETWORK_CONFIGURATION_MASTER.md), [docs/07-ccip/](../07-ccip/). | + +## Related + +- [dbis_chain_138_technical_master_plan.md](../../dbis_chain_138_technical_master_plan.md) +- [CHAIN138_CANONICAL_NETWORK_ROLES_VALIDATORS_SENTRY_AND_RPC.md](CHAIN138_CANONICAL_NETWORK_ROLES_VALIDATORS_SENTRY_AND_RPC.md) +- [VMID_ALLOCATION_FINAL.md](VMID_ALLOCATION_FINAL.md) + diff --git a/docs/02-architecture/DBIS_PHASE2_PROXMOX_SOVEREIGNIZATION_ROADMAP.md b/docs/02-architecture/DBIS_PHASE2_PROXMOX_SOVEREIGNIZATION_ROADMAP.md new file mode 100644 index 0000000..1826fc0 --- /dev/null +++ b/docs/02-architecture/DBIS_PHASE2_PROXMOX_SOVEREIGNIZATION_ROADMAP.md @@ -0,0 +1,69 @@ +# DBIS Phase 2 — Proxmox sovereignization roadmap + +**Last updated:** 2026-03-28 +**Purpose:** Close the gap between **today’s** Proxmox footprint (2–3 active cluster nodes, ZFS/LVM-backed guests, VLAN 11 LAN) and the **target** in [dbis_chain_138_technical_master_plan.md](../../dbis_chain_138_technical_master_plan.md) Sections 4–5 and 8 (multi-node HA, Ceph-backed storage, stronger segmentation, standardized templates). + +**Current ground truth:** [PROXMOX_VE_OPERATIONAL_DEPLOYMENT_TEMPLATE.md](../03-deployment/PROXMOX_VE_OPERATIONAL_DEPLOYMENT_TEMPLATE.md), [config/proxmox-operational-template.json](../../config/proxmox-operational-template.json), [STORAGE_GROWTH_AND_HEALTH.md](../04-configuration/STORAGE_GROWTH_AND_HEALTH.md). + +--- + +## Current state (summary) + +| Area | As deployed (typical) | Master plan target | +|------|----------------------|-------------------| +| Cluster | Corosync cluster **h** on ml110 + r630-01 + r630-02 (ml110 **may** be repurposed — verify Phase 1) | 3+ control-oriented nodes, odd quorum, HA services | +| Storage | Local ZFS / LVM thin pools per host | Ceph OSD tier + pools for VM disks and/or RBD | +| Network | Primary **192.168.11.0/24**, VLAN 11, UDM Pro edge, NPMplus ingress | Additional VLANs: storage replication, validator-only, identity, explicit DMZ mapping | +| Workloads | Chain 138 Besu validators/RPC, Hyperledger CTs, apps — see [DBIS_NODE_ROLE_MATRIX.md](DBIS_NODE_ROLE_MATRIX.md) | Same roles, **template-standardized** provisioning | + +--- + +## Milestone 1 — Cluster quorum and fleet expansion + +- Bring **r630-03+** online per [R630_13_NODE_DOD_HA_MASTER_PLAN.md](R630_13_NODE_DOD_HA_MASTER_PLAN.md) and [11-references/13_NODE_AND_ASSETS_BRING_ONLINE_CHECKLIST.md](../11-references/13_NODE_AND_ASSETS_BRING_ONLINE_CHECKLIST.md). +- Maintain **odd** node count for Corosync quorum; use qdevice if temporarily even-count during ml110 migration ([UDM_PRO_PROXMOX_CLUSTER.md](../04-configuration/UDM_PRO_PROXMOX_CLUSTER.md)). + +--- + +## Milestone 2 — ML110 migration / WAN aggregator + +- **Before** repurposing ml110 to OPNsense/pfSense ([ML110_OPNSENSE_PFSENSE_WAN_AGGREGATOR.md](../11-references/ML110_OPNSENSE_PFSENSE_WAN_AGGREGATOR.md)): migrate all remaining CT/VM to R630s ([NETWORK_CONFIGURATION_MASTER.md](../11-references/NETWORK_CONFIGURATION_MASTER.md)). +- Re-document **physical inventory** row for `.10` after cutover ([PHYSICAL_HARDWARE_INVENTORY.md](PHYSICAL_HARDWARE_INVENTORY.md)). + +--- + +## Milestone 3 — Ceph introduction (decision + prerequisites) + +- **Decision record:** whether Ceph replaces or complements ZFS/LVM for new workloads; minimum network (10G storage net, jumbo frames if used), disk layout, and JBOD attachment per [HARDWARE_INVENTORY_MASTER.md](../11-references/HARDWARE_INVENTORY_MASTER.md). +- Pilot: non-production pool → migrate one test CT → expand OSD count. + +--- + +## Milestone 4 — Network segmentation (incremental) + +Map master plan layers to implementable steps: + +1. Dedicated **storage replication** VLAN (Ceph backhaul or ZFS sync). +2. **Validator / P2P** constraints (firewall rules between sentry and RPC tiers — align [CHAIN138_CANONICAL_NETWORK_ROLES_VALIDATORS_SENTRY_AND_RPC.md](CHAIN138_CANONICAL_NETWORK_ROLES_VALIDATORS_SENTRY_AND_RPC.md)). +3. **Identity / Indy** tier isolation when multi-entity governance requires it. + +--- + +## Milestone 5 — VM / CT templates (Section 7 of master plan) + +- Align [PROXMOX_VM_CREATION_RUNBOOK.md](../03-deployment/PROXMOX_VM_CREATION_RUNBOOK.md) with template types: Identity (Indy/Aries), Settlement (Besu), Institutional (Fabric), Workflow (FireFly), Observability (Explorer/monitoring). +- Encode **preferred_node** and sizing in [DBIS_NODE_ROLE_MATRIX.md](DBIS_NODE_ROLE_MATRIX.md) and sync [proxmox-operational-template.json](../../config/proxmox-operational-template.json). + +--- + +## Milestone 6 — Backup and DR alignment (master plan Sections 8, 16) + +- Hourly/daily snapshot policy per guest tier; cross-site replication targets (RPO/RTO) documented outside this file when available. +- Reference: existing backup scripts for NPMplus and operator checklist. + +--- + +## Related + +- [PHASE1_DISCOVERY_RUNBOOK.md](../03-deployment/PHASE1_DISCOVERY_RUNBOOK.md) +- [DBIS_PHASE3_E2E_PRODUCTION_SIMULATION_RUNBOOK.md](../03-deployment/DBIS_PHASE3_E2E_PRODUCTION_SIMULATION_RUNBOOK.md) diff --git a/docs/02-architecture/PHYSICAL_HARDWARE_INVENTORY.md b/docs/02-architecture/PHYSICAL_HARDWARE_INVENTORY.md index c9608e3..f2ebfa0 100644 --- a/docs/02-architecture/PHYSICAL_HARDWARE_INVENTORY.md +++ b/docs/02-architecture/PHYSICAL_HARDWARE_INVENTORY.md @@ -1,7 +1,7 @@ # Physical Hardware Inventory -**Last Updated:** 2026-02-13 -**Document Version:** 1.1 +**Last Updated:** 2026-03-28 +**Document Version:** 1.2 **Status:** Active Documentation --- @@ -14,12 +14,12 @@ This document is the placeholder for the physical hardware inventory (hosts, IPs | Host | IP | Role | NICs | |------|-----|------|------| -| ml110 | 192.168.11.10 | Proxmox, Besu nodes | 2× Broadcom BCM5717 1GbE | -| r630-01 | 192.168.11.11 | Infrastructure, RPC | 4× Broadcom BCM5720 1GbE | -| r630-02 | 192.168.11.12 | Firefly, NPMplus secondary | 4× Broadcom BCM57800 1/10GbE | +| ml110 | 192.168.11.10 | **Transitional:** historically Proxmox + Besu/sentry/ML workloads; **target** is OPNsense/pfSense WAN aggregator between cable modems and dual UDM Pro — see [NETWORK_CONFIGURATION_MASTER.md](../11-references/NETWORK_CONFIGURATION_MASTER.md). Confirm live role with `pvecm status` / `pct list` on `.10` (Phase 1: `scripts/verify/run-phase1-discovery.sh`). | 2× Broadcom BCM5717 1GbE | +| r630-01 | 192.168.11.11 | Infrastructure, Chain 138 RPC, Sankofa/Order, CCIP relay host | 4× Broadcom BCM5720 1GbE | +| r630-02 | 192.168.11.12 | FireFly, Fabric/Indy/Cacti, NPMplus instances, MIM4U, Mifos | 4× Broadcom BCM57800 1/10GbE | | UDM Pro (edge) | 76.53.10.34 | Edge router | — | -**See:** [PROXMOX_HOSTS_COMPLETE_HARDWARE_CONFIG.md](PROXMOX_HOSTS_COMPLETE_HARDWARE_CONFIG.md), [NETWORK_CONFIGURATION_MASTER.md](../11-references/NETWORK_CONFIGURATION_MASTER.md), [NETWORK_ARCHITECTURE.md](NETWORK_ARCHITECTURE.md), [VMID_ALLOCATION_FINAL.md](VMID_ALLOCATION_FINAL.md). +**See:** [PROXMOX_HOSTS_COMPLETE_HARDWARE_CONFIG.md](PROXMOX_HOSTS_COMPLETE_HARDWARE_CONFIG.md), [NETWORK_CONFIGURATION_MASTER.md](../11-references/NETWORK_CONFIGURATION_MASTER.md), [NETWORK_ARCHITECTURE.md](NETWORK_ARCHITECTURE.md), [VMID_ALLOCATION_FINAL.md](VMID_ALLOCATION_FINAL.md), [DBIS_NODE_ROLE_MATRIX.md](DBIS_NODE_ROLE_MATRIX.md). --- diff --git a/docs/03-deployment/CALIPER_CHAIN138_PERF_HOOK.md b/docs/03-deployment/CALIPER_CHAIN138_PERF_HOOK.md new file mode 100644 index 0000000..eb100e0 --- /dev/null +++ b/docs/03-deployment/CALIPER_CHAIN138_PERF_HOOK.md @@ -0,0 +1,23 @@ +# Caliper performance hook — Chain 138 (Besu) + +**Last updated:** 2026-03-28 +**Purpose:** Satisfy [dbis_chain_138_technical_master_plan.md](../../dbis_chain_138_technical_master_plan.md) Section 14 without vendoring Caliper into this repository. + +## Approach + +1. Use upstream [Hyperledger Caliper](https://github.com/hyperledger/caliper) (npm package `/@hyperledger/caliper-cli`). +2. Create a **separate** working directory (or CI job) with: + - `networkconfig.json` pointing `url` to Chain 138 HTTP RPC (prefer an isolated load-test node, not production public RPC). + - `benchmarks/` with a minimal `read` workload (`eth_blockNumber`, `eth_getBlockByNumber`) before write-heavy contracts. +3. Run: `npx caliper launch manager --caliper-workspace . --caliper-networkconfig networkconfig.json --caliper-benchconfig benchmarks/config.yaml` +4. Archive results (HTML/JSON) next to Phase 1 discovery reports if desired: `reports/phase1-discovery/` or `reports/caliper/`. + +## Safety + +- Use **low** transaction rates first; Besu validators and RPC tier are production assets. +- Do not point Caliper at **validator** JSON-RPC ports; use **RPC tier** only. +- Align gas and chain ID with `smom-dbis-138/.env` and [DEPLOYMENT_ORDER_OF_OPERATIONS.md](DEPLOYMENT_ORDER_OF_OPERATIONS.md). + +## Wrapper + +`bash scripts/verify/print-caliper-chain138-stub.sh` prints this path and suggested env vars (no network I/O). diff --git a/docs/03-deployment/DBIS_HYPERLEDGER_RUNTIME_STATUS.md b/docs/03-deployment/DBIS_HYPERLEDGER_RUNTIME_STATUS.md new file mode 100644 index 0000000..09e1482 --- /dev/null +++ b/docs/03-deployment/DBIS_HYPERLEDGER_RUNTIME_STATUS.md @@ -0,0 +1,66 @@ +# DBIS Hyperledger Runtime Status + +**Last Reviewed:** 2026-03-28 +**Purpose:** Concise app-level status table for the non-Besu Hyperledger footprint currently hosted on Proxmox. This complements the VMID inventory and discovery runbooks by recording what was actually verified inside the running containers. + +## Scope + +This document summarizes the latest operator verification for: + +- FireFly CTs: `6200`, `6201` +- Fabric CTs: `6000`, `6001`, `6002` +- Indy CTs: `6400`, `6401`, `6402` + +The checks were based on: + +- `pct status` +- in-container process checks +- in-container listener checks +- FireFly API / Postgres / IPFS checks where applicable + +## Current status table + +| VMID | Service family | CT status | App-level status | Listening ports / probe | Notes | +|------|----------------|-----------|------------------|--------------------------|-------| +| `6200` | FireFly primary | Running | Healthy minimal local gateway | `5000/tcp` FireFly API, `5432/tcp` Postgres, `5001/tcp` IPFS | `firefly-core` restored on `ghcr.io/hyperledger/firefly:v1.2.0`; `GET /api/v1/status` returned `200`; Postgres `pg_isready` passed; IPFS version probe passed | +| `6201` | FireFly secondary | Stopped | Standby / incomplete | None verified | CT exists but rootfs is effectively empty and no valid FireFly deployment footprint was found; do not treat as active secondary | +| `6000` | Fabric primary | Running | Unproven | No Fabric listener verified | CT runs, but current app-level checks did not show active peer/orderer processes or expected listeners such as `7050` / `7051` | +| `6001` | Fabric secondary | Running | Unproven | No Fabric listener verified | Same current state as `6000` | +| `6002` | Fabric tertiary | Running | Unproven | No Fabric listener verified | Same current state as `6000` | +| `6400` | Indy primary | Running | Unproven | No Indy listener verified | CT runs, but current checks did not show Indy node listeners on expected ports such as `9701`-`9708` | +| `6401` | Indy secondary | Running | Unproven | No Indy listener verified | Same current state as `6400` | +| `6402` | Indy tertiary | Running | Unproven | No Indy listener verified | Same current state as `6400` | + +## Interpretation + +### Confirmed working now + +- FireFly primary (`6200`) is restored enough to provide a working local FireFly API backed by Postgres and IPFS. + +### Present but not currently proved as active application workloads + +- Fabric CTs (`6000`-`6002`) +- Indy CTs (`6400`-`6402`) + +These should be described as container footprints under validation, not as fully verified production application nodes, until app-level services and expected listeners are confirmed. + +### Not currently active + +- FireFly secondary (`6201`) should be treated as standby or incomplete deployment state unless it is intentionally rebuilt and verified. + +## Operational follow-up + +1. Keep `6200` under observation and preserve its working config/image path. +2. Do not force `6201` online unless its intended role and deployment assets are re-established. +3. For Fabric and Indy, the next verification step is app-native validation, not more CT-level checks. +4. Any governance or architecture document should distinguish: + - `deployed and app-healthy` + - `container present only` + - `planned / aspirational` + +## Related artifacts + +- [docs/02-architecture/DBIS_NODE_ROLE_MATRIX.md](../02-architecture/DBIS_NODE_ROLE_MATRIX.md) +- [docs/03-deployment/PHASE1_DISCOVERY_RUNBOOK.md](PHASE1_DISCOVERY_RUNBOOK.md) +- [docs/03-deployment/DBIS_PHASE3_E2E_PRODUCTION_SIMULATION_RUNBOOK.md](DBIS_PHASE3_E2E_PRODUCTION_SIMULATION_RUNBOOK.md) +- [dbis_chain_138_technical_master_plan.md](../../dbis_chain_138_technical_master_plan.md) diff --git a/docs/03-deployment/DBIS_PHASE3_E2E_PRODUCTION_SIMULATION_RUNBOOK.md b/docs/03-deployment/DBIS_PHASE3_E2E_PRODUCTION_SIMULATION_RUNBOOK.md new file mode 100644 index 0000000..cfd399f --- /dev/null +++ b/docs/03-deployment/DBIS_PHASE3_E2E_PRODUCTION_SIMULATION_RUNBOOK.md @@ -0,0 +1,76 @@ +# DBIS Phase 3 — End-to-end production simulation + +**Last updated:** 2026-03-28 +**Purpose:** Operationalize [dbis_chain_138_technical_master_plan.md](../../dbis_chain_138_technical_master_plan.md) Section 18 (example flow) and Sections 14, 17 as **repeatable liveness and availability checks** — not a single product build or a full business-E2E execution harness. + +**Prerequisites:** LAN access where noted; [DBIS_NODE_ROLE_MATRIX.md](../02-architecture/DBIS_NODE_ROLE_MATRIX.md) for IPs/VMIDs; operator env via `scripts/lib/load-project-env.sh` for on-chain steps. + +--- + +## Section 18 flow → concrete checks + +| Step | Master plan | Verification (repo-aligned) | +|------|-------------|-----------------------------| +| 1 | Identity issued (Indy) | Indy steward / node RPC on VMID **6400** (192.168.11.64); pool genesis tools — **manual** until automated issuer script exists. Current CTs `6400/6401/6402` are present, but app-level Indy listener verification is still pending. | +| 2 | Credential verified (Aries) | Aries agents (if colocated): confirm stack on Indy/FireFly integration path — **TBD** per deployment. | +| 3 | Workflow triggered (FireFly) | FireFly API on **6200** (currently restored as a minimal local gateway profile at `http://192.168.11.35:5000`). VMID **6201** is presently stopped / standby and should not be assumed active. | +| 4 | Settlement executed (Besu) | JSON-RPC `eth_chainId`, `eth_blockNumber`, optional test transaction via `smom-dbis-138` with `RPC_URL_138=http://192.168.11.211:8545`. PMM/oracle: [ORACLE_AND_KEEPER_CHAIN138.md](../../smom-dbis-138/docs/integration/ORACLE_AND_KEEPER_CHAIN138.md). | +| 5 | Cross-chain sync (Cacti) | Cacti = network monitoring here (VMID **5200**); **Hyperledger Cacti** interoperability is **future/optional** — track separately if deployed. **CCIP:** relay on r630-01 per [CCIP_RELAY_DEPLOYMENT.md](../07-ccip/CCIP_RELAY_DEPLOYMENT.md). | +| 6 | Compliance recorded (Fabric) | Fabric CTs `6000/6001/6002` are present, but current app-level verification has not yet proven active peer / orderer workloads inside those CTs. Treat Fabric business-flow validation as manual until that gap is closed. | +| 7 | Final settlement confirmed | Re-check Besu head on **2101** and **2201**; Blockscout **5000** for tx receipt if applicable. | + +--- + +## Automated wrapper (partial) + +From repo root: + +```bash +bash scripts/verify/run-dbis-phase3-e2e-simulation.sh +``` + +Optional: + +```bash +RUN_CHAIN138_RPC_HEALTH=1 bash scripts/verify/run-dbis-phase3-e2e-simulation.sh +``` + +The script **does not** replace Indy/Fabric business transactions; it proves **liveness** of RPC, optional FireFly HTTP, and prints manual follow-ups. Treat it as a wrapper for infrastructure availability, not as proof that the complete seven-step business flow succeeded. + +--- + +## Performance slice (Section 14 — Caliper) + +Hyperledger Caliper is **not** vendored in this repo. To add benchmarks: + +1. Install Caliper in a throwaway directory or CI image. +2. Point a Besu **SUT** at `http://192.168.11.211:8545` (deploy/core RPC only) or a dedicated load-test RPC. +3. Start with `simple` contract scenarios; record **TPS**, **latency p95**, and **error rate**. + +**Suggested initial thresholds (tune per governance):** + +| Metric | Initial gate (lab) | +|--------|-------------------| +| RPC error rate under steady load | less than 1% for 5 min | +| Block production | no stall > 30s (QBFT) | +| Public RPC `eth_blockNumber` lag vs core | within documented spread ([check-chain138-rpc-health.sh](../../scripts/verify/check-chain138-rpc-health.sh) defaults) | + +Details: [CALIPER_CHAIN138_PERF_HOOK.md](CALIPER_CHAIN138_PERF_HOOK.md). + +--- + +## Production readiness certification (matrix-driven) + +Use [OPERATOR_READY_CHECKLIST.md](../00-meta/OPERATOR_READY_CHECKLIST.md) section **10** plus: + +- Phase 1 report timestamped under `reports/phase1-discovery/`. +- Phase 2 milestones acknowledged (Ceph/segmentation may be partial). +- Node Role Matrix: no critical **TBD** for entity-owned validators without a documented interim owner. + +--- + +## Related + +- [PHASE1_DISCOVERY_RUNBOOK.md](PHASE1_DISCOVERY_RUNBOOK.md) +- [DBIS_PHASE2_PROXMOX_SOVEREIGNIZATION_ROADMAP.md](../02-architecture/DBIS_PHASE2_PROXMOX_SOVEREIGNIZATION_ROADMAP.md) +- [verify-end-to-end-routing.sh](../../scripts/verify/verify-end-to-end-routing.sh) — public/private ingress diff --git a/docs/03-deployment/PHASE1_DISCOVERY_RUNBOOK.md b/docs/03-deployment/PHASE1_DISCOVERY_RUNBOOK.md new file mode 100644 index 0000000..33e7e5f --- /dev/null +++ b/docs/03-deployment/PHASE1_DISCOVERY_RUNBOOK.md @@ -0,0 +1,119 @@ +# Phase 1 — Reality mapping runbook + +**Last updated:** 2026-03-28 +**Purpose:** Operational steps for [dbis_chain_138_technical_master_plan.md](../../dbis_chain_138_technical_master_plan.md) Sections 3 and 19.1–19.3: inventory Proxmox, Besu, optional Hyperledger CTs, and record dependency context. + +**Outputs:** Timestamped report under `reports/phase1-discovery/` (created by the orchestrator script). + +**Pass / fail semantics:** the orchestrator still writes a full evidence report when a critical section fails, but it now exits **non-zero** and appends a final **Critical failure summary** section. Treat the markdown as evidence capture, not automatic proof of success. + +--- + +## Prerequisites + +- Repo root; `jq` recommended for template audit. +- **LAN:** SSH keys to Proxmox nodes (default `192.168.11.10`, `.11`, `.12` from `config/ip-addresses.conf`). +- Optional: `curl` for RPC probe. + +--- + +## One-command orchestrator + +```bash +bash scripts/verify/run-phase1-discovery.sh +``` + +Optional Hyperledger container smoke checks (SSH to r630-02, `pct exec`): + +```bash +HYPERLEDGER_PROBE=1 bash scripts/verify/run-phase1-discovery.sh +``` + +Each run writes: + +- `reports/phase1-discovery/phase1-discovery-YYYYMMDD_HHMMSS.md` — human-readable report with embedded diagram and command output. +- `reports/phase1-discovery/phase1-discovery-YYYYMMDD_HHMMSS.log` — same content log mirror. + +Critical sections for exit status: + +- Proxmox template audit +- `pvecm` / `pvesm` / `pct list` / `qm list` +- Chain 138 core RPC quick probe +- `check-chain138-rpc-health.sh` +- `verify-besu-enodes-and-ips.sh` +- optional Hyperledger CT probe when `HYPERLEDGER_PROBE=1` + +See also `reports/phase1-discovery/README.md`. + +--- + +## Dependency graph (logical) + +Ingress → RPC/sentries/validators → explorer; CCIP relay on r630-01 uses public RPC; FireFly/Fabric/Indy are optional DLT sides for the Section 18 flow. + +```mermaid +flowchart TB + subgraph edge [EdgeIngress] + CF[Cloudflare_DNS] + NPM[NPMplus_LXC] + end + subgraph besu [Chain138_Besu] + RPCpub[RPC_public_2201] + RPCcore[RPC_core_2101] + Val[Validators_1000_1004] + Sen[Sentries_1500_1508] + end + subgraph observe [Observability] + BS[Blockscout_5000] + end + subgraph relay [CrossChain] + CCIP[CCIP_relay_r63001_host] + end + subgraph dlt [Hyperledger_optional] + FF[FireFly_6200_6201] + Fab[Fabric_6000_plus] + Indy[Indy_6400_plus] + end + CF --> NPM + NPM --> RPCpub + NPM --> RPCcore + NPM --> BS + RPCpub --> Sen + RPCcore --> Sen + Sen --> Val + CCIP --> RPCpub + FF --> Fab + FF --> Indy +``` + +**References:** [PROXMOX_VE_OPERATIONAL_DEPLOYMENT_TEMPLATE.md](PROXMOX_VE_OPERATIONAL_DEPLOYMENT_TEMPLATE.md), [ALL_VMIDS_ENDPOINTS.md](../04-configuration/ALL_VMIDS_ENDPOINTS.md), [NETWORK_CONFIGURATION_MASTER.md](../11-references/NETWORK_CONFIGURATION_MASTER.md). + +--- + +## Manual follow-ups + +| Task | Command / doc | +|------|----------------| +| Template vs live VMIDs | `bash scripts/verify/audit-proxmox-operational-template.sh` | +| Besu configs | `bash scripts/audit-besu-configs.sh` (review before running; LAN) | +| IP audit | `bash scripts/audit-all-vm-ips.sh` | +| Node role constitution | [DBIS_NODE_ROLE_MATRIX.md](../02-architecture/DBIS_NODE_ROLE_MATRIX.md) | + +--- + +## ML110 documentation reconciliation + +**Physical inventory** summary must match **live** role: + +- If `192.168.11.10` still runs **Proxmox** and hosts guests, state that explicitly. +- If migration to **OPNsense/pfSense WAN aggregator** is in progress or complete, align with [NETWORK_CONFIGURATION_MASTER.md](../11-references/NETWORK_CONFIGURATION_MASTER.md) and [PHYSICAL_HARDWARE_INVENTORY.md](../02-architecture/PHYSICAL_HARDWARE_INVENTORY.md). + +Use `pvecm status` and `pct list` on `.10` from the orchestrator output as evidence. + +--- + +## Related + +- [DBIS_NODE_ROLE_MATRIX.md](../02-architecture/DBIS_NODE_ROLE_MATRIX.md) +- [DBIS_PHASE2_PROXMOX_SOVEREIGNIZATION_ROADMAP.md](../02-architecture/DBIS_PHASE2_PROXMOX_SOVEREIGNIZATION_ROADMAP.md) +- [DBIS_PHASE3_E2E_PRODUCTION_SIMULATION_RUNBOOK.md](DBIS_PHASE3_E2E_PRODUCTION_SIMULATION_RUNBOOK.md) diff --git a/docs/04-configuration/RPC_ENDPOINTS_MASTER.md b/docs/04-configuration/RPC_ENDPOINTS_MASTER.md index 502f32d..f0c5900 100644 --- a/docs/04-configuration/RPC_ENDPOINTS_MASTER.md +++ b/docs/04-configuration/RPC_ENDPOINTS_MASTER.md @@ -31,6 +31,25 @@ This is the **authoritative source** for all RPC endpoint configurations. All ot - Set in `config/ip-addresses.conf` or `smom-dbis-138/.env`. In smom `.env`, **`RPC_URL`** is an accepted alias for **Core** and is normalized to `RPC_URL_138`. `CHAIN138_RPC_URL` / `CHAIN138_RPC` are derived from `RPC_URL_138`. `WS_URL_138_PUBLIC` is the WebSocket for Public (e.g. `ws://192.168.11.221:8546`). - **Core RPC (VMID 2101) for deploy:** Use **IP and port**, not FQDN. Set `RPC_URL_138=http://192.168.11.211:8545` in `smom-dbis-138/.env` for contract deployment and gas checks. Do not use `https://rpc-core.d-bis.org` for deployment (avoids DNS/tunnel dependency; direct IP is reliable from LAN). See [TODOS_CONSOLIDATED](../00-meta/TODOS_CONSOLIDATED.md) § First (0b). +### Public RPC capability baseline + +The public Chain 138 RPC tier is expected to provide the following wallet-grade baseline: + +- `eth_chainId` +- `eth_blockNumber` +- `eth_syncing` +- `eth_gasPrice` +- `eth_feeHistory` +- `eth_maxPriorityFeePerGas` +- `eth_estimateGas` +- `eth_getCode` +- `trace_block` +- `trace_replayBlockTransactions` + +Use [scripts/verify/check-chain138-rpc-health.sh](/home/intlc/projects/proxmox/scripts/verify/check-chain138-rpc-health.sh) for the live health and capability probe. + +If `eth_maxPriorityFeePerGas` is missing, the first fix path is the public node version on VMID `2201`. Besu `24.7.0+` adds support for that method; use [upgrade-public-rpc-vmid2201.sh](/home/intlc/projects/proxmox/scripts/besu/upgrade-public-rpc-vmid2201.sh) to perform the targeted public-RPC upgrade. + | Variable / use | Canonical value | Notes | |----------------|-----------------|--------| | **RPC_URL_138** (Core) | `http://192.168.11.211:8545` | **Prefer IP:port for admin/deploy.** Fallback from off-LAN: `https://rpc-core.d-bis.org` | diff --git a/docs/MASTER_INDEX.md b/docs/MASTER_INDEX.md index ab8b91a..532d565 100644 --- a/docs/MASTER_INDEX.md +++ b/docs/MASTER_INDEX.md @@ -1,6 +1,6 @@ # Documentation — Master Index -**Last Updated:** 2026-03-27 +**Last Updated:** 2026-03-28 **Purpose:** Single entry point for all project documentation. Use this index to find canonical sources and avoid deprecated or duplicate content. **Status:** Preflight and Chain 138 next steps completed (59/59 on-chain per [check-contracts-on-chain-138.sh](../../scripts/verify/check-contracts-on-chain-138.sh), 12 c* GRU-registered). **2026-03-06:** Contract check list expanded to 59 addresses (PMM, vault/reserve, CompliantFiatTokens); doc refs updated. **2026-03-04:** Celo CCIP bridges deployed; Phase A–D tracked in [03-deployment/REMAINING_DEPLOYMENTS_FOR_FULL_NETWORK_COVERAGE.md](03-deployment/REMAINING_DEPLOYMENTS_FOR_FULL_NETWORK_COVERAGE.md). Phase C: [PHASE_C_CW_AND_EDGE_POOLS_RUNBOOK.md](03-deployment/PHASE_C_CW_AND_EDGE_POOLS_RUNBOOK.md); Phase D: [PHASE_D_OPTIONAL_CHECKLIST.md](03-deployment/PHASE_D_OPTIONAL_CHECKLIST.md). **On-chain verification:** DODOPMMIntegration canonical cUSDT/cUSDC — [EXPLORER_TOKEN_LIST_CROSSCHECK](11-references/EXPLORER_TOKEN_LIST_CROSSCHECK.md) §8. **Remaining:** Wemix 0.4 WEMIX, LINK fund, cW* + edge pools — see [00-meta/TODOS_CONSOLIDATED.md](00-meta/TODOS_CONSOLIDATED.md). @@ -57,8 +57,8 @@ | Area | Index / key doc | |------|-----------------| | **00-meta** (tasks, next steps, phases) | [00-meta/NEXT_STEPS_INDEX.md](00-meta/NEXT_STEPS_INDEX.md), [00-meta/PHASES_AND_TASKS_MASTER.md](00-meta/PHASES_AND_TASKS_MASTER.md) | -| **02-architecture** | [02-architecture/](02-architecture/) — **Public sector + Phoenix catalog baseline:** [02-architecture/PUBLIC_SECTOR_TENANCY_MARKETPLACE_AND_DEPLOYMENT_BASELINE.md](02-architecture/PUBLIC_SECTOR_TENANCY_MARKETPLACE_AND_DEPLOYMENT_BASELINE.md); **non-goals (incl. catalog vs marketing §9):** [02-architecture/NON_GOALS.md](02-architecture/NON_GOALS.md) | -| **03-deployment** | [03-deployment/OPERATIONAL_RUNBOOKS.md](03-deployment/OPERATIONAL_RUNBOOKS.md), [03-deployment/DEPLOYMENT_ORDER_OF_OPERATIONS.md](03-deployment/DEPLOYMENT_ORDER_OF_OPERATIONS.md), **Public sector live checklist:** [03-deployment/PUBLIC_SECTOR_LIVE_DEPLOYMENT_CHECKLIST.md](03-deployment/PUBLIC_SECTOR_LIVE_DEPLOYMENT_CHECKLIST.md), **Proxmox VE ops template:** [03-deployment/PROXMOX_VE_OPERATIONAL_DEPLOYMENT_TEMPLATE.md](03-deployment/PROXMOX_VE_OPERATIONAL_DEPLOYMENT_TEMPLATE.md) · [`config/proxmox-operational-template.json`](config/proxmox-operational-template.json) | +| **02-architecture** | [02-architecture/](02-architecture/) — **Public sector + Phoenix catalog baseline:** [02-architecture/PUBLIC_SECTOR_TENANCY_MARKETPLACE_AND_DEPLOYMENT_BASELINE.md](02-architecture/PUBLIC_SECTOR_TENANCY_MARKETPLACE_AND_DEPLOYMENT_BASELINE.md); **non-goals (incl. catalog vs marketing §9):** [02-architecture/NON_GOALS.md](02-architecture/NON_GOALS.md); **DBIS Chain 138:** [dbis_chain_138_technical_master_plan.md](../dbis_chain_138_technical_master_plan.md), [02-architecture/DBIS_NODE_ROLE_MATRIX.md](02-architecture/DBIS_NODE_ROLE_MATRIX.md), [02-architecture/DBIS_PHASE2_PROXMOX_SOVEREIGNIZATION_ROADMAP.md](02-architecture/DBIS_PHASE2_PROXMOX_SOVEREIGNIZATION_ROADMAP.md) | +| **03-deployment** | [03-deployment/OPERATIONAL_RUNBOOKS.md](03-deployment/OPERATIONAL_RUNBOOKS.md), [03-deployment/DEPLOYMENT_ORDER_OF_OPERATIONS.md](03-deployment/DEPLOYMENT_ORDER_OF_OPERATIONS.md), **Public sector live checklist:** [03-deployment/PUBLIC_SECTOR_LIVE_DEPLOYMENT_CHECKLIST.md](03-deployment/PUBLIC_SECTOR_LIVE_DEPLOYMENT_CHECKLIST.md), **Proxmox VE ops template:** [03-deployment/PROXMOX_VE_OPERATIONAL_DEPLOYMENT_TEMPLATE.md](03-deployment/PROXMOX_VE_OPERATIONAL_DEPLOYMENT_TEMPLATE.md) · [`config/proxmox-operational-template.json`](config/proxmox-operational-template.json); **DBIS Phase 1–3:** [03-deployment/PHASE1_DISCOVERY_RUNBOOK.md](03-deployment/PHASE1_DISCOVERY_RUNBOOK.md), [03-deployment/DBIS_PHASE3_E2E_PRODUCTION_SIMULATION_RUNBOOK.md](03-deployment/DBIS_PHASE3_E2E_PRODUCTION_SIMULATION_RUNBOOK.md), [03-deployment/CALIPER_CHAIN138_PERF_HOOK.md](03-deployment/CALIPER_CHAIN138_PERF_HOOK.md), [03-deployment/DBIS_HYPERLEDGER_RUNTIME_STATUS.md](03-deployment/DBIS_HYPERLEDGER_RUNTIME_STATUS.md) | | **04-configuration** | [04-configuration/README.md](04-configuration/README.md), [04-configuration/ADDITIONAL_PATHS_AND_EXTENSIONS.md](04-configuration/ADDITIONAL_PATHS_AND_EXTENSIONS.md) (paths, registry, token-mapping, LiFi/Jumper); **Chain 138 wallets:** [04-configuration/CHAIN138_WALLET_CONFIG_VALIDATION.md](04-configuration/CHAIN138_WALLET_CONFIG_VALIDATION.md); **Chain 2138 testnet wallets:** [04-configuration/CHAIN2138_WALLET_CONFIG_VALIDATION.md](04-configuration/CHAIN2138_WALLET_CONFIG_VALIDATION.md); **OMNL Indonesia / HYBX-BATCH-001:** [04-configuration/mifos-omnl-central-bank/HYBX_BATCH_001_OPERATOR_CHECKLIST.md](04-configuration/mifos-omnl-central-bank/HYBX_BATCH_001_OPERATOR_CHECKLIST.md), [04-configuration/mifos-omnl-central-bank/INDONESIA_PACKAGE_4_995_EVIDENCE_STANDARD.md](04-configuration/mifos-omnl-central-bank/INDONESIA_PACKAGE_4_995_EVIDENCE_STANDARD.md) | | **06-besu** | [06-besu/MASTER_INDEX.md](06-besu/MASTER_INDEX.md) | | **Testnet (2138)** | [testnet/DEFI_ORACLE_META_TESTNET_2138_RUNBOOK.md](testnet/DEFI_ORACLE_META_TESTNET_2138_RUNBOOK.md), [testnet/TESTNET_DEPLOYMENT.md](testnet/TESTNET_DEPLOYMENT.md) | diff --git a/explorer-monorepo b/explorer-monorepo index 630021c..3bca539 160000 --- a/explorer-monorepo +++ b/explorer-monorepo @@ -1 +1 @@ -Subproject commit 630021c04318f9db9e2623bee66d0d36841c17cd +Subproject commit 3bca5394fc25c30812f5883420553e7221eb9824 diff --git a/reports/phase1-discovery/README.md b/reports/phase1-discovery/README.md new file mode 100644 index 0000000..ada874d --- /dev/null +++ b/reports/phase1-discovery/README.md @@ -0,0 +1,7 @@ +# Phase 1 discovery reports + +Timestamped artifacts from `bash scripts/verify/run-phase1-discovery.sh`. + +- **Naming:** `phase1-discovery-YYYYMMDD_HHMMSS.md` (report) and `.log` (mirror). +- **Runbook:** [docs/03-deployment/PHASE1_DISCOVERY_RUNBOOK.md](../../docs/03-deployment/PHASE1_DISCOVERY_RUNBOOK.md) +- **Git:** `reports/phase1-discovery/phase1-discovery-*.md` is listed in repo `.gitignore` (timestamped noise). `*.log` is already ignored globally. Commit only when you intentionally remove a report from the pattern or store evidence elsewhere. diff --git a/scripts/besu/upgrade-public-rpc-vmid2201.sh b/scripts/besu/upgrade-public-rpc-vmid2201.sh new file mode 100755 index 0000000..421922b --- /dev/null +++ b/scripts/besu/upgrade-public-rpc-vmid2201.sh @@ -0,0 +1,204 @@ +#!/usr/bin/env bash +# Upgrade the public Chain 138 RPC node (VMID 2201) to a Besu version that supports +# eth_maxPriorityFeePerGas. Default target is the current fleet baseline. +# +# Usage: +# bash scripts/besu/upgrade-public-rpc-vmid2201.sh +# bash scripts/besu/upgrade-public-rpc-vmid2201.sh --dry-run +# BESU_VERSION=25.12.0 bash scripts/besu/upgrade-public-rpc-vmid2201.sh + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" +source "${PROJECT_ROOT}/config/ip-addresses.conf" 2>/dev/null || true + +VMID="${RPC_VMID_2201:-2201}" +RPC_HOST="${RPC_VM_2201_HOST:-root@${PROXMOX_R630_02:-192.168.11.12}}" +[[ "$RPC_HOST" != *"@"* ]] && RPC_HOST="root@$RPC_HOST" + +BESU_VERSION="${BESU_VERSION:-25.12.0}" +BESU_TAR="besu-${BESU_VERSION}.tar.gz" +BESU_DIR="/opt/besu-${BESU_VERSION}" +DOWNLOAD_URL="${BESU_DOWNLOAD_URL:-https://github.com/hyperledger/besu/releases/download/${BESU_VERSION}/${BESU_TAR}}" +JAVA21_FALLBACK_URL="${JAVA21_FALLBACK_URL:-https://api.adoptium.net/v3/binary/latest/21/ga/linux/x64/jre/hotspot/normal/eclipse}" +RPC_HTTP_MAX_ACTIVE_CONNECTIONS="${RPC_HTTP_MAX_ACTIVE_CONNECTIONS:-256}" +RPC_WS_MAX_ACTIVE_CONNECTIONS="${RPC_WS_MAX_ACTIVE_CONNECTIONS:-256}" +LOCAL_CACHE="${LOCAL_CACHE:-/tmp}" +DRY_RUN=false +[[ "${1:-}" == "--dry-run" ]] && DRY_RUN=true + +SSH_OPTS=(-o StrictHostKeyChecking=accept-new -o ConnectTimeout=15) +RPC_IP="${RPC_PUBLIC_1:-192.168.11.221}" + +run_on_host() { + ssh "${SSH_OPTS[@]}" "$RPC_HOST" "$@" +} + +run_in_vmid() { + local cmd="$1" + if command -v pct >/dev/null 2>&1 && pct list 2>/dev/null | grep -q "^${VMID} "; then + pct exec "$VMID" -- bash -lc "$cmd" + else + run_on_host "pct exec ${VMID} -- bash -lc $(printf '%q' "$cmd")" + fi +} + +push_to_vmid() { + local src="$1" + local dest="$2" + if command -v pct >/dev/null 2>&1 && pct list 2>/dev/null | grep -q "^${VMID} "; then + pct push "$VMID" "$src" "$dest" + else + local host_tmp="/tmp/$(basename "$src")" + scp "${SSH_OPTS[@]}" "$src" "${RPC_HOST}:${host_tmp}" + run_on_host "pct push ${VMID} $(printf '%q' "$host_tmp") $(printf '%q' "$dest") && rm -f $(printf '%q' "$host_tmp")" + fi +} + +rpc_request() { + local method="$1" + local params="${2:-[]}" + curl -sS --max-time 20 -X POST "http://${RPC_IP}:8545" \ + -H "Content-Type: application/json" \ + -d "{\"jsonrpc\":\"2.0\",\"method\":\"${method}\",\"params\":${params},\"id\":1}" +} + +echo "==============================================" +echo "Upgrade public Chain 138 RPC (VMID ${VMID})" +echo "Host: ${RPC_HOST}" +echo "Target Besu version: ${BESU_VERSION}" +echo "==============================================" +if $DRY_RUN; then + echo "[dry-run] No changes will be made." +fi + +run_on_host "echo connected >/dev/null" + +run_in_vmid " + set -euo pipefail + if [[ ! -e /opt/besu ]]; then + fallback=\$(find /opt -maxdepth 1 -type d -name 'besu-*' | sort -V | tail -1) + if [[ -n \"\${fallback:-}\" ]]; then + ln -sfn \"\$fallback\" /opt/besu + chown -h besu:besu /opt/besu 2>/dev/null || true + fi + fi +" + +CURRENT_VERSION="$(run_in_vmid '/opt/besu/bin/besu --version 2>/dev/null || besu --version 2>/dev/null || true' | head -1 || true)" +JAVA_VERSION_RAW="$(run_in_vmid 'java -version 2>&1 | head -1' || true)" +echo "Current version: ${CURRENT_VERSION:-unknown}" +echo "Current Java: ${JAVA_VERSION_RAW:-unknown}" + +if $DRY_RUN; then + echo "[dry-run] Would download ${DOWNLOAD_URL}" + echo "[dry-run] Would stage ${BESU_TAR} in VMID ${VMID}, extract to ${BESU_DIR}, switch /opt/besu, restart besu-rpc." + exit 0 +fi + +mkdir -p "$LOCAL_CACHE" +if [[ ! -f "${LOCAL_CACHE}/${BESU_TAR}" ]]; then + echo "Downloading ${DOWNLOAD_URL} ..." + curl -fsSL -o "${LOCAL_CACHE}/${BESU_TAR}" "${DOWNLOAD_URL}" +fi + +echo "Pushing tarball into VMID ${VMID} ..." +push_to_vmid "${LOCAL_CACHE}/${BESU_TAR}" "/tmp/${BESU_TAR}" + +echo "Ensuring Java 21 runtime is present ..." +run_in_vmid " + set -euo pipefail + java_major=\$(java -version 2>&1 | sed -n '1s/.*version \"\\([0-9][0-9]*\\).*/\\1/p') + if [[ -z \"\${java_major:-}\" || \"\$java_major\" -lt 21 ]]; then + export DEBIAN_FRONTEND=noninteractive + apt-get update -qq + apt-get install -y -qq openjdk-21-jre-headless || true + java_major=\$(java -version 2>&1 | sed -n '1s/.*version \"\\([0-9][0-9]*\\).*/\\1/p') + if [[ -z \"\${java_major:-}\" || \"\$java_major\" -lt 21 ]]; then + command -v curl >/dev/null 2>&1 || apt-get install -y -qq curl ca-certificates + tmp_jre=/tmp/java21-jre.tar.gz + curl -fsSL -o \"\$tmp_jre\" '${JAVA21_FALLBACK_URL}' + tar -tzf \"\$tmp_jre\" > /tmp/java21-jre.list + extracted_dir=\$(head -1 /tmp/java21-jre.list | cut -d/ -f1) + rm -f /tmp/java21-jre.list + tar -xzf \"\$tmp_jre\" -C /opt + rm -f \"\$tmp_jre\" + ln -sfn \"/opt/\${extracted_dir}\" /opt/java-21 + update-alternatives --install /usr/bin/java java /opt/java-21/bin/java 2100 + fi + fi + config_file=\$(systemctl cat besu-rpc.service | sed -n 's/.*--config-file=\\([^ ]*\\).*/\\1/p' | tail -1) + if [[ -n \"\${config_file:-}\" && -f \"\$config_file\" ]]; then + find /etc/besu -maxdepth 1 -type f -name '*.toml' -print0 2>/dev/null | while IFS= read -r -d '' toml; do + sed -i \ + -e '/^[[:space:]]*miner-enabled[[:space:]]*=.*/d' \ + -e '/^[[:space:]]*privacy-enabled[[:space:]]*=.*/d' \ + \"\$toml\" + if grep -q '^rpc-http-enabled=true' \"\$toml\" && ! grep -q '^rpc-http-max-active-connections=' \"\$toml\"; then + tmp=\$(mktemp) + awk '1; /^rpc-http-port=/{print \"rpc-http-max-active-connections=${RPC_HTTP_MAX_ACTIVE_CONNECTIONS}\"}' \"\$toml\" > \"\$tmp\" + cat \"\$tmp\" > \"\$toml\" + rm -f \"\$tmp\" + fi + if grep -q '^rpc-ws-enabled=true' \"\$toml\" && ! grep -q '^rpc-ws-max-active-connections=' \"\$toml\"; then + tmp=\$(mktemp) + awk '1; /^rpc-ws-port=/{print \"rpc-ws-max-active-connections=${RPC_WS_MAX_ACTIVE_CONNECTIONS}\"}' \"\$toml\" > \"\$tmp\" + cat \"\$tmp\" > \"\$toml\" + rm -f \"\$tmp\" + fi + done + if ! grep -q '^data-storage-format=' \"\$config_file\"; then + tmp=\$(mktemp) + awk '1; /^sync-mode=/{print \"data-storage-format=\\\"FOREST\\\"\"}' \"\$config_file\" > \"\$tmp\" + cat \"\$tmp\" > \"\$config_file\" + rm -f \"\$tmp\" + fi + fi +" + +echo "Installing Besu ${BESU_VERSION} inside VMID ${VMID} ..." +run_in_vmid " + set -euo pipefail + cd /opt + if [[ -L /opt/besu ]]; then + current_target=\$(readlink -f /opt/besu) + current_version=\$(basename \"\$current_target\") + else + current_version=\$(/opt/besu/bin/besu --version 2>/dev/null | head -1 | sed -E 's#^.*/(v)?([0-9.]+).*\$#besu-\\2#') + [[ -z \"\$current_version\" ]] && current_version=besu-backup-pre-${BESU_VERSION} + mv /opt/besu \"/opt/\${current_version}\" + fi + rm -rf '${BESU_DIR}' + tar -xzf '/tmp/${BESU_TAR}' -C /opt + rm -f '/tmp/${BESU_TAR}' + ln -sfn '${BESU_DIR}' /opt/besu + chown -h besu:besu /opt/besu + chown -R besu:besu '${BESU_DIR}' /opt/besu-* 2>/dev/null || true +" + +echo "Restarting besu-rpc.service ..." +run_in_vmid "systemctl restart besu-rpc.service" +for _ in $(seq 1 24); do + ACTIVE_STATE="$(run_in_vmid 'systemctl is-active besu-rpc.service' || true)" + [[ "$ACTIVE_STATE" == "active" ]] && break + sleep 5 +done +NEW_VERSION="$(run_in_vmid '/opt/besu/bin/besu --version 2>/dev/null | grep -m1 "besu/" || true' | head -1 || true)" +echo "Service state: ${ACTIVE_STATE:-unknown}" +echo "New version: ${NEW_VERSION:-unknown}" + +echo "Verifying live RPC methods ..." +CHAIN_ID="$(rpc_request eth_chainId | jq -r '.result // empty' 2>/dev/null || true)" +PRIORITY_FEE="$(curl -sS --max-time 20 -X POST 'https://rpc-http-pub.d-bis.org' -H 'Content-Type: application/json' -d '{\"jsonrpc\":\"2.0\",\"method\":\"eth_maxPriorityFeePerGas\",\"params\":[],\"id\":1}' | jq -r '.result // empty' 2>/dev/null || true)" +TRACE_OK="$(rpc_request trace_block '[\"0x1\"]' | jq -r 'has(\"result\")' 2>/dev/null || true)" + +if [[ "$ACTIVE_STATE" != "active" || -z "$CHAIN_ID" || "$TRACE_OK" != "true" || -z "$PRIORITY_FEE" ]]; then + echo "ERROR: post-upgrade verification failed." + echo " eth_chainId result: ${CHAIN_ID:-missing}" + echo " trace_block result present: ${TRACE_OK:-false}" + echo " eth_maxPriorityFeePerGas result: ${PRIORITY_FEE:-missing}" + exit 1 +fi + +echo "OK: VMID ${VMID} upgraded successfully and public RPC now exposes eth_maxPriorityFeePerGas." diff --git a/scripts/docs/generate-dbis-node-role-matrix-md.sh b/scripts/docs/generate-dbis-node-role-matrix-md.sh new file mode 100755 index 0000000..a7f2ec9 --- /dev/null +++ b/scripts/docs/generate-dbis-node-role-matrix-md.sh @@ -0,0 +1,148 @@ +#!/usr/bin/env bash +# Regenerate docs/02-architecture/DBIS_NODE_ROLE_MATRIX.md body tables from +# config/proxmox-operational-template.json (run from repo root). +set -euo pipefail +ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" +JSON="$ROOT/config/proxmox-operational-template.json" +OUT="$ROOT/docs/02-architecture/DBIS_NODE_ROLE_MATRIX.md" + +if ! command -v jq &>/dev/null; then + echo "jq required" >&2 + exit 1 +fi + +TMP="$(mktemp)" +trap 'rm -f "$TMP"' EXIT + +jq -r ' +def vstatus: + if .category == "besu_validator" then "QBFT signer" + elif .category == "besu_sentry" then "Sentry (no signer)" + elif (.category | test("^rpc")) then "RPC only" + else "N/A" + end; +def ntype: + if .category == "besu_validator" then "Besu validator" + elif .category == "besu_sentry" then "Besu sentry" + elif .category == "rpc_core" or .category == "rpc_public" or .category == "rpc_private" or .category == "rpc_named" or .category == "rpc_thirdweb" or .category == "rpc_alltra_hybx" then "Besu RPC (\(.category))" + elif .category == "dlt" and (.hostname | test("fabric")) then "Fabric" + elif .category == "dlt" and (.hostname | test("indy")) then "Indy" + elif .category == "firefly" then "FireFly" + elif .category == "explorer" then "Blockscout" + elif .category == "npmplus" then "NPMplus ingress" + elif .category == "infra" then "Infra LXC" + elif .category == "monitoring" and (.hostname | test("cacti")) then "Cacti" + elif .category == "monitoring" then "Monitoring" + elif .category == "oracle" then "Oracle publisher" + elif .category == "ccip" then "CCIP monitor" + elif .category == "tunnel" then "Cloudflare tunnel" + elif .category == "ml" then "ML node" + elif .category == "vault" then "HashiCorp Vault" + elif .category == "order" then "The Order service" + elif .category == "sankofa_phoenix" then "Sankofa / Phoenix" + elif .category == "mim4u" then "MIM4U" + elif .category == "dbis" then "DBIS stack" + elif .category == "mifos" then "Mifos" + elif .category == "dapp" then "DApp" + elif .category == "dev" then "Dev" + elif .category == "ai_infra" then "AI infra" + elif .category == "defi" then "DeFi" + elif .category == "general" then "General CT" + elif .category == "legacy_proxy" then "Legacy NPM" + else .category + end; +def stier: + if .category == "besu_validator" or .category == "besu_sentry" then "validator-tier" + elif (.category | test("^rpc")) then "DMZ / RPC exposure" + elif .category == "npmplus" or .category == "tunnel" then "edge ingress" + elif .category == "dlt" or .category == "firefly" then "identity / workflow DLT" + elif .category == "vault" or .category == "infra" then "management / secrets" + elif .category == "order" or .category == "sankofa_phoenix" or .category == "dbis" then "application" + else "standard internal" + end; +([.services[] | select(.ipv4 != null) | .ipv4] | group_by(.) | map(select(length > 1) | .[0])) as $dup_ips +| .services[] +| (.ipv4) as $ip +| [(.vmid // "—"), .hostname, ($ip // "—"), (if ($ip != null and ($dup_ips | index($ip))) then "shared / non-concurrent mapping — verify live owner" else "unique in template" end), ntype, "TBD", "TBD", (.preferred_node // "—"), vstatus, stier] +| @tsv +' "$JSON" | sort -t$'\t' -k1,1n > "$TMP" + +UPDATED="$(date -u +%Y-%m-%d)" +{ + cat < "$OUT" + +echo "Wrote $OUT" diff --git a/scripts/upgrade-besu-all-nodes.sh b/scripts/upgrade-besu-all-nodes.sh index 4eba9ba..767fd30 100755 --- a/scripts/upgrade-besu-all-nodes.sh +++ b/scripts/upgrade-besu-all-nodes.sh @@ -1,52 +1,31 @@ #!/usr/bin/env bash -# Upgrade all Besu nodes to the latest (or specified) version. -# Requires: SSH to Proxmox host, curl/wget, enough disk in containers. +# Upgrade all running Besu containers to the requested version. +# Installs Java 21 where needed, preserves the previous /opt/besu-* directory for rollback, +# and restarts the detected Besu systemd unit in each container. +# # Usage: -# ./scripts/upgrade-besu-all-nodes.sh # upgrade to latest (25.12.0) -# ./scripts/upgrade-besu-all-nodes.sh --dry-run # show what would be done -# BESU_VERSION=25.11.0 ./scripts/upgrade-besu-all-nodes.sh -# Optional: pre-download to avoid long run (script uses $LOCAL_CACHE/besu-${BESU_VERSION}.tar.gz): -# curl -sSL -o /tmp/besu-25.12.0.tar.gz https://github.com/hyperledger/besu/releases/download/25.12.0/besu-25.12.0.tar.gz +# bash scripts/upgrade-besu-all-nodes.sh +# bash scripts/upgrade-besu-all-nodes.sh --dry-run +# BESU_VERSION=25.12.0 bash scripts/upgrade-besu-all-nodes.sh set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" +source "${PROJECT_ROOT}/config/ip-addresses.conf" 2>/dev/null || true -if [ -f "$PROJECT_ROOT/config/ip-addresses.conf" ]; then - # shellcheck source=../config/ip-addresses.conf - source "$PROJECT_ROOT/config/ip-addresses.conf" -fi - -PROXMOX_HOST="${PROXMOX_HOST:-${PROXMOX_HOST_ML110:-192.168.11.10}}" -# Latest stable as of 2025-12; EIP-7702 requires >= 24.1.0 BESU_VERSION="${BESU_VERSION:-25.12.0}" BESU_TAR="besu-${BESU_VERSION}.tar.gz" -BESU_DIR="besu-${BESU_VERSION}" +BESU_DIR="/opt/besu-${BESU_VERSION}" DOWNLOAD_URL="${BESU_DOWNLOAD_URL:-https://github.com/hyperledger/besu/releases/download/${BESU_VERSION}/${BESU_TAR}}" +JAVA21_FALLBACK_URL="${JAVA21_FALLBACK_URL:-https://api.adoptium.net/v3/binary/latest/21/ga/linux/x64/jre/hotspot/normal/eclipse}" +RPC_HTTP_MAX_ACTIVE_CONNECTIONS="${RPC_HTTP_MAX_ACTIVE_CONNECTIONS:-256}" +RPC_WS_MAX_ACTIVE_CONNECTIONS="${RPC_WS_MAX_ACTIVE_CONNECTIONS:-256}" LOCAL_CACHE="${LOCAL_CACHE:-/tmp}" - DRY_RUN=false -for arg in "$@"; do - [ "$arg" = "--dry-run" ] && DRY_RUN=true -done +[[ "${1:-}" == "--dry-run" ]] && DRY_RUN=true -# Same node list and services as restart-all-besu-services.sh -declare -A NODE_SERVICES=( - ["1000"]="besu-validator" - ["1001"]="besu-validator" - ["1002"]="besu-validator" - ["1003"]="besu-validator" - ["1004"]="besu-validator" - ["1500"]="besu-sentry" - ["1501"]="besu-sentry" - ["1502"]="besu-sentry" - ["1503"]="besu-sentry" - ["2101"]="besu-rpc" - ["2400"]="besu-rpc" - ["2401"]="besu-rpc" - ["2402"]="besu-rpc" -) +SSH_OPTS=(-o ConnectTimeout=20 -o ServerAliveInterval=15 -o ServerAliveCountMax=3 -o StrictHostKeyChecking=accept-new) RED='\033[0;31m' GREEN='\033[0;32m' @@ -54,120 +33,245 @@ YELLOW='\033[1;33m' BLUE='\033[0;34m' NC='\033[0m' -log_info() { echo -e "${BLUE}[INFO]${NC} $1"; } -log_ok() { echo -e "${GREEN}[OK]${NC} $1"; } -log_warn() { echo -e "${YELLOW}[WARN]${NC} $1"; } -log_err() { echo -e "${RED}[ERROR]${NC} $1"; } +log_info() { echo -e "${BLUE}[INFO]${NC} $1"; } +log_ok() { echo -e "${GREEN}[OK]${NC} $1"; } +log_warn() { echo -e "${YELLOW}[WARN]${NC} $1"; } +log_err() { echo -e "${RED}[ERROR]${NC} $1"; } -is_running() { - local vmid=$1 - ssh -o ConnectTimeout=3 -o StrictHostKeyChecking=accept-new root@"$PROXMOX_HOST" \ - "pct status $vmid 2>/dev/null" | grep -q running +declare -A HOST_BY_VMID +for v in 1000 1001 1002 1500 1501 1502 2101; do HOST_BY_VMID[$v]="${PROXMOX_R630_01:-${PROXMOX_HOST_R630_01:-192.168.11.11}}"; done +for v in 2201 2303 2401; do HOST_BY_VMID[$v]="${PROXMOX_R630_02:-${PROXMOX_HOST_R630_02:-192.168.11.12}}"; done +for v in 1003 1004 1503 1504 1505 1506 1507 1508 2102 2301 2304 2305 2306 2307 2308 2400 2402 2403; do HOST_BY_VMID[$v]="${PROXMOX_ML110:-${PROXMOX_HOST_ML110:-192.168.11.10}}"; done + +BESU_VMIDS=( + 1000 1001 1002 1003 1004 + 1500 1501 1502 1503 1504 1505 1506 1507 1508 + 2101 2102 2201 2301 2303 2304 2305 2306 2307 2308 + 2400 2401 2402 2403 +) + +host_ssh() { + local host="$1" + shift + ssh "${SSH_OPTS[@]}" "root@${host}" "$@" } -# Ensure tarball exists (download to host or use cache) ensure_tarball() { - local path="$LOCAL_CACHE/$BESU_TAR" - if [ -f "$path" ]; then - log_ok "Using existing $path" - echo "$path" - return - fi - log_info "Downloading $DOWNLOAD_URL ..." - if $DRY_RUN; then - echo "" - return - fi - (cd "$LOCAL_CACHE" && curl -sSfL -o "$BESU_TAR" "$DOWNLOAD_URL") || { - log_err "Download failed" - return 1 - } - log_ok "Downloaded $path" - echo "$path" + local path="${LOCAL_CACHE}/${BESU_TAR}" + mkdir -p "$LOCAL_CACHE" + if [[ -f "$path" ]]; then + log_ok "Using existing $path" >&2 + printf '%s\n' "$path" + return 0 + fi + if $DRY_RUN; then + printf '%s\n' "$path" + return 0 + fi + log_info "Downloading ${DOWNLOAD_URL}" >&2 + curl -fsSL -o "$path" "$DOWNLOAD_URL" + log_ok "Downloaded $path" >&2 + printf '%s\n' "$path" +} + +detect_service() { + local host="$1" + local vmid="$2" + host_ssh "$host" "pct exec ${vmid} -- bash -lc 'systemctl list-units --type=service --no-legend 2>/dev/null | awk \"{print \\\$1}\" | grep -iE \"^besu-(validator|sentry|rpc|rpc-core)\\.service$|^besu\\.service$\" | head -1'" 2>/dev/null || true +} + +is_running() { + local host="$1" + local vmid="$2" + host_ssh "$host" "pct status ${vmid} 2>/dev/null | awk '{print \$2}'" 2>/dev/null | grep -q '^running$' +} + +prepare_host_tarball() { + local host="$1" + local local_path="$2" + local host_tmp="/tmp/${BESU_TAR}" + if $DRY_RUN; then + log_info " [dry-run] would copy ${BESU_TAR} to ${host}:${host_tmp}" + return 0 + fi + scp "${SSH_OPTS[@]}" "$local_path" "root@${host}:${host_tmp}" >/dev/null } upgrade_node() { - local vmid=$1 - local service="${NODE_SERVICES[$vmid]:-besu-rpc}" - local tarball_path="$2" + local host="$1" + local vmid="$2" + local service="$3" - if ! is_running "$vmid"; then - log_warn "VMID $vmid not running — skip" - return 0 + if ! is_running "$host" "$vmid"; then + log_warn "VMID ${vmid} @ ${host}: not running, skipping" + return 0 + fi + + if [[ -z "$service" ]]; then + log_warn "VMID ${vmid} @ ${host}: no Besu service detected, skipping" + return 0 + fi + + log_info "VMID ${vmid} @ ${host}: upgrading ${service} to Besu ${BESU_VERSION}" + + if $DRY_RUN; then + log_info " [dry-run] would install Java 21, extract ${BESU_TAR}, switch /opt/besu, restart ${service}" + return 0 + fi + + host_ssh "$host" "pct push ${vmid} /tmp/${BESU_TAR} /tmp/${BESU_TAR}" >/dev/null + + host_ssh "$host" "pct exec ${vmid} -- bash -lc ' + set -euo pipefail + if [[ ! -e /opt/besu ]]; then + fallback=\$(find /opt -maxdepth 1 -type d -name \"besu-*\" | sort -V | tail -1) + if [[ -n \"\${fallback:-}\" ]]; then + ln -sfn \"\$fallback\" /opt/besu + chown -h besu:besu /opt/besu 2>/dev/null || true + fi + elif [[ ! -L /opt/besu ]]; then + current_semver=\$(/opt/besu/bin/besu --version 2>/dev/null | grep -Eo \"[0-9]+\\.[0-9]+\\.[0-9]+\" | head -1) + current_version=\"besu-\${current_semver:-}\" + [[ -z \"\${current_version:-}\" ]] && current_version=besu-backup-pre-${BESU_VERSION} + if [[ ! -d \"/opt/\${current_version}\" ]]; then + mv /opt/besu \"/opt/\${current_version}\" + else + rm -rf /opt/besu + fi + ln -sfn \"/opt/\${current_version}\" /opt/besu + chown -h besu:besu /opt/besu 2>/dev/null || true fi - - log_info "VMID $vmid: upgrade to Besu $BESU_VERSION ($service) ..." - - if $DRY_RUN; then - log_info " [dry-run] would push $BESU_TAR and extract, switch /opt/besu, restart $service" - return 0 + java_major=\$(java -version 2>&1 | sed -n \"1s/.*version \\\"\\([0-9][0-9]*\\).*/\\1/p\") + if [[ -z \"\${java_major:-}\" || \"\$java_major\" -lt 21 ]]; then + export DEBIAN_FRONTEND=noninteractive + apt-get update -qq + apt-get install -y -qq openjdk-21-jre-headless || true + java_major=\$(java -version 2>&1 | sed -n \"1s/.*version \\\"\\([0-9][0-9]*\\).*/\\1/p\") + if [[ -z \"\${java_major:-}\" || \"\$java_major\" -lt 21 ]]; then + command -v curl >/dev/null 2>&1 || apt-get install -y -qq curl ca-certificates + tmp_jre=/tmp/java21-jre.tar.gz + curl -fsSL -o \"\$tmp_jre\" '${JAVA21_FALLBACK_URL}' + tar -tzf \"\$tmp_jre\" > /tmp/java21-jre.list + extracted_dir=\$(head -1 /tmp/java21-jre.list | cut -d/ -f1) + rm -f /tmp/java21-jre.list + tar -xzf \"\$tmp_jre\" -C /opt + rm -f \"\$tmp_jre\" + ln -sfn \"/opt/\${extracted_dir}\" /opt/java-21 + update-alternatives --install /usr/bin/java java /opt/java-21/bin/java 2100 + fi fi - - # Copy tarball into container - if ! ssh -o ConnectTimeout=10 -o StrictHostKeyChecking=accept-new root@"$PROXMOX_HOST" \ - "pct push $vmid $tarball_path /tmp/$BESU_TAR" 2>/dev/null; then - log_err " Failed to push tarball to $vmid" - return 1 + config_file=\$(systemctl cat ${service} | sed -n \"s/.*--config-file=\\\\([^ ]*\\\\).*/\\\\1/p\" | tail -1) + if [[ -n \"\${config_file:-}\" && -f \"\$config_file\" ]]; then + find /etc/besu -maxdepth 1 -type f -name \"*.toml\" -print0 2>/dev/null | while IFS= read -r -d \"\" toml; do + sed -i \ + -e \"/^[[:space:]]*miner-enabled[[:space:]]*=.*/d\" \ + -e \"/^[[:space:]]*privacy-enabled[[:space:]]*=.*/d\" \ + \"\$toml\" + if grep -q \"^rpc-http-enabled=true\" \"\$toml\" && ! grep -q \"^rpc-http-max-active-connections=\" \"\$toml\"; then + tmp=\$(mktemp) + awk \"1; /^rpc-http-port=/{print \\\"rpc-http-max-active-connections=${RPC_HTTP_MAX_ACTIVE_CONNECTIONS}\\\"}\" \"\$toml\" > \"\$tmp\" + cat \"\$tmp\" > \"\$toml\" + rm -f \"\$tmp\" + fi + if grep -q \"^rpc-ws-enabled=true\" \"\$toml\" && ! grep -q \"^rpc-ws-max-active-connections=\" \"\$toml\"; then + tmp=\$(mktemp) + awk \"1; /^rpc-ws-port=/{print \\\"rpc-ws-max-active-connections=${RPC_WS_MAX_ACTIVE_CONNECTIONS}\\\"}\" \"\$toml\" > \"\$tmp\" + cat \"\$tmp\" > \"\$toml\" + rm -f \"\$tmp\" + fi + done + if ! grep -q \"^data-storage-format=\" \"\$config_file\"; then + tmp=\$(mktemp) + awk \"1; /^sync-mode=/{print \\\"data-storage-format=\\\\\\\"FOREST\\\\\\\"\\\"}\" \"\$config_file\" > \"\$tmp\" + cat \"\$tmp\" > \"\$config_file\" + rm -f \"\$tmp\" + fi fi - - # Extract, switch symlink, fix ownership, restart (each step via pct exec to avoid quoting issues) - ssh -o ConnectTimeout=60 -o StrictHostKeyChecking=accept-new root@"$PROXMOX_HOST" \ - "pct exec $vmid -- bash -c 'cd /opt && tar -xzf /tmp/$BESU_TAR && rm -f /tmp/$BESU_TAR'" || { - log_err " VMID $vmid: extract failed" - return 1 - } - ssh -o ConnectTimeout=10 root@"$PROXMOX_HOST" \ - "pct exec $vmid -- bash -c 'cd /opt && rm -f besu && ln -sf $BESU_DIR besu && chown -R besu:besu $BESU_DIR besu 2>/dev/null || true'" || true - ssh -o ConnectTimeout=15 root@"$PROXMOX_HOST" \ - "pct exec $vmid -- systemctl restart ${service}.service" || { - log_err " VMID $vmid: restart failed" - return 1 - } - sleep 3 - local active - active=$(ssh -o ConnectTimeout=5 root@"$PROXMOX_HOST" "pct exec $vmid -- systemctl is-active ${service}.service 2>/dev/null" || echo "unknown") - if [ "$active" = "active" ]; then - log_ok " VMID $vmid upgraded and $service active" - return 0 + cd /opt + if [[ ! -d ${BESU_DIR} ]]; then + tar -xzf /tmp/${BESU_TAR} -C /opt fi - log_err " VMID $vmid: service status after restart: $active" - return 1 + rm -f /tmp/${BESU_TAR} + ln -sfn ${BESU_DIR} /opt/besu + chown -h besu:besu /opt/besu 2>/dev/null || true + chown -R besu:besu ${BESU_DIR} /opt/besu-* 2>/dev/null || true + systemctl restart ${service} + '" || return 1 + + local active version + active="" + for _ in $(seq 1 24); do + active="$(host_ssh "$host" "pct exec ${vmid} -- systemctl is-active ${service}" 2>/dev/null || true)" + [[ "$active" == "active" ]] && break + sleep 5 + done + version="$(host_ssh "$host" "pct exec ${vmid} -- bash -lc '/opt/besu/bin/besu --version 2>/dev/null | grep -m1 \"besu/\" || true'" 2>/dev/null || true)" + if [[ "$active" == "active" ]]; then + log_ok " VMID ${vmid}: ${service} active (${version:-version unavailable})" + return 0 + fi + + log_err " VMID ${vmid}: ${service} state=${active:-unknown}" + host_ssh "$host" "pct exec ${vmid} -- journalctl -u ${service} -n 30 --no-pager" 2>/dev/null || true + return 1 } -# --- main --- -log_info "Upgrade Besu on all nodes to $BESU_VERSION (host: $PROXMOX_HOST)" -[ "$DRY_RUN" = true ] && log_warn "DRY RUN — no changes will be made" -echo "" +log_info "Upgrade Besu fleet to ${BESU_VERSION}" +$DRY_RUN && log_warn "DRY RUN: no changes will be made" +echo -# Check SSH -if ! ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=accept-new root@"$PROXMOX_HOST" "echo OK" &>/dev/null; then - log_err "Cannot SSH to $PROXMOX_HOST" - exit 1 -fi +TARBALL_PATH="$(ensure_tarball)" -tarball_path="" -if ! $DRY_RUN; then - tarball_path=$(ensure_tarball) || exit 1 - [ -z "$tarball_path" ] && exit 1 -fi +declare -A VMIDS_ON_HOST +for vmid in "${BESU_VMIDS[@]}"; do + host="${HOST_BY_VMID[$vmid]:-}" + [[ -n "$host" ]] || continue + VMIDS_ON_HOST[$host]+=" ${vmid}" +done PASS=0 FAIL=0 -VMIDS_SORTED=$(echo "${!NODE_SERVICES[@]}" | tr ' ' '\n' | sort -n) -for vmid in $VMIDS_SORTED; do - if upgrade_node "$vmid" "$tarball_path"; then - ((PASS++)) || true - else - ((FAIL++)) || true +SKIP=0 + +for host in "${!VMIDS_ON_HOST[@]}"; do + log_info "Host ${host}" + if ! host_ssh "$host" "echo OK" >/dev/null 2>&1; then + log_err " Cannot SSH to ${host}" + ((FAIL++)) || true + continue + fi + + prepare_host_tarball "$host" "$TARBALL_PATH" + + for vmid in ${VMIDS_ON_HOST[$host]}; do + service="$(detect_service "$host" "$vmid")" + if ! is_running "$host" "$vmid"; then + log_warn "VMID ${vmid} @ ${host}: not running, skipping" + ((SKIP++)) || true + continue fi - echo "" + if [[ -z "$service" ]]; then + log_warn "VMID ${vmid} @ ${host}: no Besu unit found, skipping" + ((SKIP++)) || true + continue + fi + if upgrade_node "$host" "$vmid" "$service"; then + ((PASS++)) || true + else + ((FAIL++)) || true + fi + echo + done + + if ! $DRY_RUN; then + host_ssh "$host" "rm -f /tmp/${BESU_TAR}" >/dev/null 2>&1 || true + fi done -echo "────────────────────────────────────────────────────────────" -log_info "Upgrade summary: $PASS succeeded, $FAIL failed" -echo "────────────────────────────────────────────────────────────" +echo "------------------------------------------------------------" +log_info "Upgrade summary: passed=${PASS} skipped=${SKIP} failed=${FAIL}" +echo "------------------------------------------------------------" -if [ "$FAIL" -gt 0 ]; then - exit 1 +if [[ "$FAIL" -gt 0 ]]; then + exit 1 fi -exit 0 diff --git a/scripts/verify/check-chain138-rpc-health.sh b/scripts/verify/check-chain138-rpc-health.sh index 846cb41..1f00b8b 100755 --- a/scripts/verify/check-chain138-rpc-health.sh +++ b/scripts/verify/check-chain138-rpc-health.sh @@ -122,28 +122,12 @@ check_supported_method() { return 1 } -check_expected_missing_method() { - local method="$1" - local params="${2:-[]}" - local response code message - response="$(rpc_request "$method" "$params" || printf '%s' '{"error":"curl"}')" - code="$(printf '%s' "$response" | jq -r '.error.code // empty' 2>/dev/null || true)" - message="$(printf '%s' "$response" | jq -r '.error.message // empty' 2>/dev/null || true)" - if [[ "$code" == "-32601" || "$message" == "Method not found" ]]; then - printf ' %-32s %s\n' "$method" "EXPECTED_MISSING" - return 0 - fi - printf ' %-32s %s\n' "$method" "UNEXPECTED" - ((fail++)) || true - return 1 -} - check_supported_method "eth_chainId" check_supported_method "eth_gasPrice" +check_supported_method "eth_maxPriorityFeePerGas" check_supported_method "eth_feeHistory" "[\"0x1\", \"latest\", []]" check_supported_method "trace_block" "[\"0x1\"]" check_supported_method "trace_replayBlockTransactions" "[\"0x1\", [\"trace\"]]" -check_expected_missing_method "eth_maxPriorityFeePerGas" if [[ "$fail" -eq 0 ]]; then echo "OK: node health and public RPC capability checks passed" diff --git a/scripts/verify/print-caliper-chain138-stub.sh b/scripts/verify/print-caliper-chain138-stub.sh new file mode 100755 index 0000000..89a5324 --- /dev/null +++ b/scripts/verify/print-caliper-chain138-stub.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash +# Print Caliper integration hints for Chain 138 (no network I/O). +# See docs/03-deployment/CALIPER_CHAIN138_PERF_HOOK.md + +set -euo pipefail +ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" +echo "Caliper is not bundled in this repo." +echo "Read: $ROOT/docs/03-deployment/CALIPER_CHAIN138_PERF_HOOK.md" +echo "" +echo "Suggested SUT URL for benchmarks (lab): \${RPC_URL_138:-http://192.168.11.211:8545}" +echo "Chain ID: 138 (verify with eth_chainId)." diff --git a/scripts/verify/run-dbis-phase3-e2e-simulation.sh b/scripts/verify/run-dbis-phase3-e2e-simulation.sh new file mode 100755 index 0000000..15aa314 --- /dev/null +++ b/scripts/verify/run-dbis-phase3-e2e-simulation.sh @@ -0,0 +1,72 @@ +#!/usr/bin/env bash +# DBIS Phase 3 — liveness / availability wrapper: Besu RPC liveness + optional FireFly HTTP + optional full RPC health. +# This does NOT execute Indy issuance, Aries verification, Fabric chaincode, or cross-chain business workflow steps. +# +# Usage: bash scripts/verify/run-dbis-phase3-e2e-simulation.sh +# Env: RPC_URL_138 (default http://192.168.11.211:8545) +# FIREFLY_URL (default http://192.168.11.35:5000) +# RUN_CHAIN138_RPC_HEALTH=1 to run check-chain138-rpc-health.sh (slower) + +set -uo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" +# shellcheck source=/dev/null +source "$PROJECT_ROOT/config/ip-addresses.conf" 2>/dev/null || true + +RPC_URL="${RPC_URL_138:-http://${IP_BESU_RPC_CORE_1:-192.168.11.211}:8545}" +FIREFLY_URL="${FIREFLY_URL:-http://192.168.11.35:5000}" + +fail=0 +echo "=== DBIS Phase 3 liveness wrapper (partial) ===" +echo "RPC: $RPC_URL" +echo "" + +if command -v curl &>/dev/null; then + echo "--- Besu eth_chainId / eth_blockNumber ---" + if ! out=$(curl -sS --connect-timeout 5 -X POST -H 'Content-Type: application/json' \ + -d '{"jsonrpc":"2.0","method":"eth_chainId","params":[],"id":1}' "$RPC_URL"); then + echo "[FAIL] curl chainId" + fail=1 + else + echo "$out" + fi + if ! out=$(curl -sS --connect-timeout 5 -X POST -H 'Content-Type: application/json' \ + -d '{"jsonrpc":"2.0","method":"eth_blockNumber","params":[],"id":1}' "$RPC_URL"); then + echo "[FAIL] curl blockNumber" + fail=1 + else + echo "$out" + fi +else + echo "[SKIP] curl not installed" + fail=1 +fi + +echo "" +echo "--- FireFly HTTP (optional) ---" +if command -v curl &>/dev/null; then + code=$(curl -sS -o /dev/null -w '%{http_code}' --connect-timeout 4 "$FIREFLY_URL/api/v1/status" || true) + if [[ "$code" =~ ^(200|401|403)$ ]]; then + echo "[OK] $FIREFLY_URL/api/v1/status HTTP $code" + else + echo "[WARN] $FIREFLY_URL/api/v1/status HTTP ${code:-000} (FireFly may be down or path differs)" + fi +else + echo "[SKIP] curl not installed" +fi + +if [[ "${RUN_CHAIN138_RPC_HEALTH:-}" == "1" ]]; then + echo "" + echo "--- check-chain138-rpc-health.sh ---" + bash "$PROJECT_ROOT/scripts/verify/check-chain138-rpc-health.sh" || fail=1 +fi + +echo "" +echo "--- Manual follow-ups (Section 18) ---" +echo "This script proves only liveness / availability for the automated checks above." +echo "Indy 6400 / Fabric 6000 / CCIP relay on r630-01: see docs/03-deployment/DBIS_PHASE3_E2E_PRODUCTION_SIMULATION_RUNBOOK.md" +echo "Caliper: docs/03-deployment/CALIPER_CHAIN138_PERF_HOOK.md" +echo "" + +exit "$fail" diff --git a/scripts/verify/run-phase1-discovery.sh b/scripts/verify/run-phase1-discovery.sh new file mode 100755 index 0000000..b3bc150 --- /dev/null +++ b/scripts/verify/run-phase1-discovery.sh @@ -0,0 +1,218 @@ +#!/usr/bin/env bash +# Phase 1 — Reality mapping (read-only): compose Proxmox/Besu audits and optional +# Hyperledger CT probes into a timestamped report under reports/phase1-discovery/. +# +# Usage (repo root, LAN + SSH to Proxmox recommended): +# bash scripts/verify/run-phase1-discovery.sh +# HYPERLEDGER_PROBE=1 bash scripts/verify/run-phase1-discovery.sh # SSH pct exec smoke checks on r630-02 +# +# Env: PROXMOX_HOSTS, SSH_USER, SSH_OPTS (same as audit-proxmox-operational-template.sh) +# HYPERLEDGER_PROBE=1 to run optional Fabric/Indy/FireFly container checks (requires SSH to r630-02) + +set -uo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" +# shellcheck source=/dev/null +source "$PROJECT_ROOT/config/ip-addresses.conf" 2>/dev/null || true + +REPORT_DIR="${REPORT_DIR:-$PROJECT_ROOT/reports/phase1-discovery}" +STAMP="$(date -u +%Y%m%d_%H%M%S)" +MD="$REPORT_DIR/phase1-discovery-${STAMP}.md" +LOG="$REPORT_DIR/phase1-discovery-${STAMP}.log" +mkdir -p "$REPORT_DIR" + +SSH_USER="${SSH_USER:-root}" +SSH_OPTS="${SSH_OPTS:--o BatchMode=yes -o ConnectTimeout=6 -o StrictHostKeyChecking=accept-new}" +R630_02="${PROXMOX_HOST_R630_02:-192.168.11.12}" + +append_cmd() { + local title="$1" + local severity="${2:-info}" + shift 2 || true + local rc=0 + local tmp + tmp="$(mktemp)" + "$@" >"$tmp" 2>&1 + rc=$? + { + echo "" + echo "## $title" + echo "" + echo '```text' + cat "$tmp" + if (( rc != 0 )); then + echo "[exit $rc]" + fi + echo '```' + } | tee -a "$MD" >>"$LOG" + rm -f "$tmp" + if (( rc != 0 )) && [[ "$severity" == "critical" ]]; then + PHASE1_CRITICAL_FAILURES+=("$title (exit $rc)") + fi +} + +PHASE1_CRITICAL_FAILURES=() + +{ + echo "# Phase 1 discovery report" + echo "" + echo "**Generated (UTC):** $(date -u +%Y-%m-%dT%H:%M:%SZ)" + echo "" + echo "**Runbook:** [docs/03-deployment/PHASE1_DISCOVERY_RUNBOOK.md](../../docs/03-deployment/PHASE1_DISCOVERY_RUNBOOK.md)" + echo "" + echo "**Doctrine:** [dbis_chain_138_technical_master_plan.md](../../dbis_chain_138_technical_master_plan.md) (Sections 3, 19–20)" + echo "" + echo "## Dependency graph (logical)" + echo "" + echo "Same diagram as the runbook; edges reflect documented traffic flow, not live packet capture." + echo "" + cat <<'MERMAID' +```mermaid +flowchart TB + subgraph edge [EdgeIngress] + CF[Cloudflare_DNS] + NPM[NPMplus_LXC] + end + subgraph besu [Chain138_Besu] + RPCpub[RPC_public_2201] + RPCcore[RPC_core_2101] + Val[Validators_1000_1004] + Sen[Sentries_1500_1508] + end + subgraph observe [Observability] + BS[Blockscout_5000] + end + subgraph relay [CrossChain] + CCIP[CCIP_relay_r63001_host] + end + subgraph dlt [Hyperledger_optional] + FF[FireFly_6200_6201] + Fab[Fabric_6000_plus] + Indy[Indy_6400_plus] + end + CF --> NPM + NPM --> RPCpub + NPM --> RPCcore + NPM --> BS + RPCpub --> Sen + RPCcore --> Sen + Sen --> Val + CCIP --> RPCpub + FF --> Fab + FF --> Indy +``` +MERMAID +} >"$MD" +touch "$LOG" + +append_cmd "Proxmox template vs live VMID audit" critical bash "$PROJECT_ROOT/scripts/verify/audit-proxmox-operational-template.sh" + +PROXMOX_HOSTS="${PROXMOX_HOSTS:-${PROXMOX_HOST_ML110:-192.168.11.10} ${PROXMOX_HOST_R630_01:-192.168.11.11} $R630_02}" +append_cmd "Proxmox cluster status (pvecm) per host" critical bash -c " +fail=0 +for h in $PROXMOX_HOSTS; do + echo '=== '"\$h"' ===' + ssh $SSH_OPTS ${SSH_USER}@\"\$h\" 'pvecm status 2>&1' || fail=1 + echo '' +done +exit \$fail +" + +append_cmd "Proxmox storage (pvesm status) per host" critical bash -c " +fail=0 +for h in $PROXMOX_HOSTS; do + echo '=== '"\$h"' ===' + ssh $SSH_OPTS ${SSH_USER}@\"\$h\" 'pvesm status 2>&1 | head -80' || fail=1 + echo '' +done +exit \$fail +" + +append_cmd "Live pct/qm lists per host" critical bash -c " +fail=0 +for h in $PROXMOX_HOSTS; do + echo '=== '"\$h"' ===' + ssh $SSH_OPTS ${SSH_USER}@\"\$h\" 'echo PCT:; pct list 2>&1; echo VM:; qm list 2>&1' || fail=1 + echo '' +done +exit \$fail +" + +if command -v curl &>/dev/null; then + append_cmd "Chain 138 RPC quick probe (core, LAN)" critical bash -c " + curl -sS --connect-timeout 4 -X POST -H 'Content-Type: application/json' \ + --data '{\"jsonrpc\":\"2.0\",\"method\":\"eth_chainId\",\"params\":[],\"id\":1}' \ + \"http://${IP_BESU_RPC_CORE_1:-192.168.11.211}:8545\" || echo 'curl failed' + " +fi + +append_cmd "Besu RPC health script (may fail off-LAN)" critical bash -c " + bash \"$PROJECT_ROOT/scripts/verify/check-chain138-rpc-health.sh\" +" + +append_cmd "Besu enodes / IPs verify (may fail off-LAN)" critical bash -c " + bash \"$PROJECT_ROOT/scripts/verify/verify-besu-enodes-and-ips.sh\" +" + +if [[ "${HYPERLEDGER_PROBE:-}" == "1" ]]; then + append_cmd "Hyperledger CT smoke (r630-02; pct exec)" critical bash -c " + ssh $SSH_OPTS ${SSH_USER}@$R630_02 ' + for id in 6200 6201 6000 6001 6002 6400 6401 6402; do + echo \"=== VMID \$id status ===\" + pct status \$id 2>&1 || true + if pct status \$id 2>/dev/null | grep -q running; then + pct exec \$id -- bash -lc \"command -v docker >/dev/null && docker ps --format 'table {{.Names}}\t{{.Status}}' 2>/dev/null | head -10 || true; command -v systemctl >/dev/null && systemctl list-units --type=service --state=running --no-pager 2>/dev/null | head -20 || true; ss -ltnp 2>/dev/null | head -20 || true\" 2>&1 || echo \"[exec failed]\" + fi + echo \"\" + done + ' + " +else + { + echo "" + echo "## Hyperledger CT smoke (skipped)" + echo "" + echo "Set \`HYPERLEDGER_PROBE=1\` to SSH to r630-02 and run \`pct status/exec\` on 6200, 6201, 6000, 6001, 6002, 6400, 6401, 6402." + echo "" + } >>"$MD" +fi + +{ + echo "" + echo "## Configuration snapshot pointers (no secrets in repo)" + echo "" + echo "- \`config/proxmox-operational-template.json\`" + echo "- \`config/ip-addresses.conf\`" + echo "- \`docs/04-configuration/ALL_VMIDS_ENDPOINTS.md\`" + echo "" + echo "## Next steps" + echo "" + echo "1. Reconcile **Entity owner** / **Region** in [DBIS_NODE_ROLE_MATRIX.md](../../docs/02-architecture/DBIS_NODE_ROLE_MATRIX.md)." + echo "2. If ML110 row shows Proxmox + workloads, update [PHYSICAL_HARDWARE_INVENTORY.md](../../docs/02-architecture/PHYSICAL_HARDWARE_INVENTORY.md) vs [NETWORK_CONFIGURATION_MASTER.md](../../docs/11-references/NETWORK_CONFIGURATION_MASTER.md)." + echo "" + if ((${#PHASE1_CRITICAL_FAILURES[@]} > 0)); then + echo "## Critical failure summary" + echo "" + for failure in "${PHASE1_CRITICAL_FAILURES[@]}"; do + echo "- $failure" + done + echo "" + echo "This report is complete as evidence capture, but the discovery run is **not** a pass. Re-run from LAN with working SSH/RPC access until the critical failures clear." + else + echo "## Critical failure summary" + echo "" + echo "- none" + echo "" + echo "All critical discovery checks completed successfully for this run." + fi + echo "" +} >>"$MD" + +echo "Wrote $MD" +echo "Full log mirror: $LOG" +ls -la "$MD" "$LOG" + +if ((${#PHASE1_CRITICAL_FAILURES[@]} > 0)); then + exit 1 +fi diff --git a/smom-dbis-138 b/smom-dbis-138 index 1771db2..2f58965 160000 --- a/smom-dbis-138 +++ b/smom-dbis-138 @@ -1 +1 @@ -Subproject commit 1771db2190343b223888e14f8155217a10ea3f4a +Subproject commit 2f58965f39e675b10095563b9ff0f3649b2f8d9e