Files
proxmox/scripts/verify/verify-backend-vms.sh
defiQUG e4c9dda0fd
Some checks failed
Deploy to Phoenix / deploy (push) Has been cancelled
chore: update submodule references and documentation
- Marked submodules ai-mcp-pmm-controller, explorer-monorepo, and smom-dbis-138 as dirty to reflect recent changes.
- Updated documentation to clarify operator script usage, including dotenv loading and task execution instructions.
- Enhanced the README and various index files to provide clearer navigation and task completion guidance.

Made-with: Cursor
2026-03-04 02:03:08 -08:00

338 lines
16 KiB
Bash
Executable File

#!/usr/bin/env bash
# Verify backend VMs configuration
# Checks status, IPs, services, ports, config files, and health endpoints
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
EVIDENCE_DIR="$PROJECT_ROOT/docs/04-configuration/verification-evidence"
# Colors
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
CYAN='\033[0;36m'
NC='\033[0m'
log_info() { echo -e "${BLUE}[INFO]${NC} $1" >&2; }
log_success() { echo -e "${GREEN}[✓]${NC} $1" >&2; }
log_warn() { echo -e "${YELLOW}[⚠]${NC} $1" >&2; }
log_error() { echo -e "${RED}[✗]${NC} $1" >&2; }
cd "$PROJECT_ROOT"
[ -f .env ] && source .env 2>/dev/null || true
[ -f config/ip-addresses.conf ] && source config/ip-addresses.conf 2>/dev/null || true
ML110_IP="${PROXMOX_HOST_ML110:-192.168.11.10}"
R630_01_IP="${PROXMOX_HOST_R630_01:-192.168.11.11}"
R630_02_IP="${PROXMOX_HOST_R630_02:-192.168.11.12}"
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
OUTPUT_DIR="$EVIDENCE_DIR/backend-vms-verification-$TIMESTAMP"
mkdir -p "$OUTPUT_DIR"
# VMIDs that are optional (not deployed yet); unknown status = informational only. Space-separated.
VM_OPTIONAL_WHEN_MISSING="${VM_OPTIONAL_WHEN_MISSING:-5801}"
# Backend VMs from baseline docs
declare -A VM_CONFIGS=(
# VMs with nginx
["5000"]="${IP_BLOCKSCOUT:-192.168.11.140}|blockscout-1|r630-02|${R630_02_IP}|nginx|/etc/nginx/sites-available/blockscout|explorer.d-bis.org"
["7810"]="${IP_MIM_WEB:-192.168.11.37}|mim-web-1|r630-02|${R630_02_IP}|nginx|/etc/nginx/sites-available/mim4u|mim4u.org,www.mim4u.org,secure.mim4u.org,training.mim4u.org"
["10130"]="${IP_DBIS_FRONTEND:-192.168.11.130}|dbis-frontend|r630-01|${R630_01_IP}|web|/etc/nginx/sites-available/dbis-frontend|dbis-admin.d-bis.org,secure.d-bis.org"
["2400"]="${RPC_THIRDWEB_PRIMARY:-${RPC_THIRDWEB_PRIMARY:-192.168.11.240}}|thirdweb-rpc-1|ml110|${ML110_IP}|nginx|/etc/nginx/sites-available/rpc-thirdweb|rpc.public-0138.defi-oracle.io"
# VMs without nginx
["2101"]="${RPC_CORE_1:-192.168.11.211}|besu-rpc-core-1|r630-01|${R630_01_IP}|besu|8545,8546|rpc-http-prv.d-bis.org,rpc-ws-prv.d-bis.org"
["2201"]="${RPC_PUBLIC_1:-192.168.11.221}|besu-rpc-public-1|r630-02|${PROXMOX_HOST_R630_02:-192.168.11.12}|besu|8545,8546|rpc-http-pub.d-bis.org,rpc-ws-pub.d-bis.org"
["10150"]="${IP_DBIS_API:-${IP_DBIS_API:-192.168.11.155}}|dbis-api-primary|r630-01|${R630_01_IP}|nodejs|3000|dbis-api.d-bis.org"
["10151"]="${IP_DBIS_API_2:-${IP_DBIS_API_2:-192.168.11.156}}|dbis-api-secondary|r630-01|${R630_01_IP}|nodejs|3000|dbis-api-2.d-bis.org"
# Mifos X + Fineract (VMID 5800); NPMplus 10237 proxies to this
["5800"]="${MIFOS_IP:-192.168.11.85}|mifos|r630-02|${R630_02_IP}|web|-|mifos.d-bis.org"
# DApp LXC (VMID 5801); NPMplus 10233 (or tunnel) proxies to this
["5801"]="${IP_DAPP_LXC:-192.168.11.58}|dapp-smom|r630-02|${R630_02_IP}|web|-|dapp.d-bis.org"
)
exec_in_vm() {
local vmid=$1
local host=$2
local cmd=$3
# Use --norc to avoid .bashrc permission errors; redirect its stderr
ssh -o StrictHostKeyChecking=no -o ConnectTimeout=10 root@"$host" "pct exec $vmid -- bash --norc -c '$cmd'" 2>/dev/null || echo "COMMAND_FAILED"
}
verify_vm() {
local vmid=$1
local config="${VM_CONFIGS[$vmid]}"
IFS='|' read -r expected_ip hostname host host_ip service_type config_path domains <<< "$config"
log_info ""
log_info "Verifying VMID $vmid: $hostname"
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" >&2
# Check VM status
VM_STATUS=$(ssh -o StrictHostKeyChecking=no root@"$host_ip" "pct status $vmid 2>/dev/null || qm status $vmid 2>/dev/null" 2>&1 || echo "unknown")
if echo "$VM_STATUS" | grep -q "running"; then
status="running"
log_success "Status: Running"
elif echo "$VM_STATUS" | grep -q "stopped"; then
status="stopped"
log_warn "Status: Stopped"
else
status="unknown"
if echo " $VM_OPTIONAL_WHEN_MISSING " | grep -qF " $vmid "; then
log_info "Status: Not deployed (optional VMID; deploy with deploy-dapp-lxc.sh to verify)"
else
log_warn "Status: Unknown"
fi
fi
# Get actual IP (use cut to avoid awk quoting issues in ssh)
if [ "$status" = "running" ]; then
# Prefer pct config - parse net0: ...ip=X.X.X.X/24 or ip0=X.X.X.X
actual_ip=$(ssh -o StrictHostKeyChecking=no root@"$host_ip" "pct config $vmid 2>/dev/null | grep -oE 'ip=[0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+' | head -1 | cut -d= -f2" 2>/dev/null || echo "")
if [ -z "$actual_ip" ] || ! [[ "$actual_ip" =~ ^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
actual_ip=$(exec_in_vm "$vmid" "$host_ip" 'hostname -I 2>/dev/null | cut -d" " -f1' 2>/dev/null | head -1 | tr -d '\n\r' || echo "")
fi
if [ "$actual_ip" = "COMMAND_FAILED" ] || [[ "$actual_ip" == *"awk"* ]] || [[ "$actual_ip" == *"error"* ]] || [[ "$actual_ip" == *"Permission denied"* ]] || [[ "$actual_ip" == *"bash:"* ]] || ! [[ "$actual_ip" =~ ^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
actual_ip=""
fi
else
actual_ip=""
fi
if [ -n "$actual_ip" ] && [ "$actual_ip" = "$expected_ip" ]; then
log_success "IP: $actual_ip (matches expected)"
elif [ -n "$actual_ip" ]; then
log_warn "IP: $actual_ip (expected $expected_ip)"
else
log_warn "IP: Could not determine (expected $expected_ip)"
fi
# Check services and ports
SERVICES=()
LISTENING_PORTS=()
if [ "$status" = "running" ]; then
# Check nginx
if [ "$service_type" = "nginx" ]; then
nginx_status=$(exec_in_vm "$vmid" "$host_ip" "systemctl is-active nginx 2>/dev/null || echo 'inactive'" 2>/dev/null | head -1 | tr -d '\n\r' || echo "unknown")
if [ "$nginx_status" = "active" ]; then
log_success "Nginx: Active"
SERVICES+=("{\"name\":\"nginx\",\"type\":\"systemd\",\"status\":\"active\"}")
# Get nginx config paths
if [ "$config_path" != "TBD" ] && [ -n "$config_path" ]; then
config_exists=$(exec_in_vm "$vmid" "$host_ip" "test -f $config_path && echo 'yes' || echo 'no'" 2>/dev/null || echo "unknown")
if [ "$config_exists" = "yes" ]; then
log_success "Nginx config: $config_path exists"
else
log_warn "Nginx config: $config_path not found"
fi
fi
# List enabled sites (xargs joins lines without tr escaping issues)
enabled_sites=$(exec_in_vm "$vmid" "$host_ip" 'ls -1 /etc/nginx/sites-enabled/ 2>/dev/null | xargs' 2>/dev/null || echo "")
if [ -n "$enabled_sites" ]; then
log_info "Enabled sites: $enabled_sites"
fi
else
log_warn "Nginx: $nginx_status"
nginx_status_clean=$(echo "$nginx_status" | head -1 | tr -d '\n\r"' || echo "unknown")
SERVICES+=("{\"name\":\"nginx\",\"type\":\"systemd\",\"status\":\"$nginx_status_clean\"}")
fi
fi
# Check Besu RPC
if [ "$service_type" = "besu" ]; then
# Check if port is listening
for port in 8545 8546; do
port_check=$(exec_in_vm "$vmid" "$host_ip" "ss -lntp 2>/dev/null | grep ':$port ' || echo ''" 2>/dev/null || echo "")
if [ -n "$port_check" ]; then
log_success "Port $port: Listening"
LISTENING_PORTS+=("{\"port\":$port,\"protocol\":\"tcp\",\"process\":\"besu\"}")
else
log_warn "Port $port: Not listening"
fi
done
SERVICES+=("{\"name\":\"besu-rpc\",\"type\":\"direct\",\"status\":\"running\"}")
fi
# Check Node.js API
if [ "$service_type" = "nodejs" ]; then
port_check=$(exec_in_vm "$vmid" "$host_ip" "ss -lntp 2>/dev/null | grep ':3000 ' || echo ''" 2>/dev/null || echo "")
if [ -n "$port_check" ]; then
log_success "Port 3000: Listening"
LISTENING_PORTS+=("{\"port\":3000,\"protocol\":\"tcp\",\"process\":\"nodejs\"}")
else
log_warn "Port 3000: Not listening"
fi
SERVICES+=("{\"name\":\"nodejs-api\",\"type\":\"systemd\",\"status\":\"running\"}")
fi
# Check web (HTTP on port 80, e.g. Python/Node serving dbis-frontend)
if [ "$service_type" = "web" ]; then
port_check=$(exec_in_vm "$vmid" "$host_ip" "ss -lntp 2>/dev/null | grep ':80 ' || echo ''" 2>/dev/null || echo "")
if [ -n "$port_check" ]; then
log_success "Port 80: Listening"
LISTENING_PORTS+=("{\"port\":80,\"protocol\":\"tcp\",\"process\":\"http\"}")
else
log_warn "Port 80: Not listening"
fi
SERVICES+=("{\"name\":\"http\",\"type\":\"direct\",\"status\":\"running\"}")
fi
# Get all listening ports
all_ports=$(exec_in_vm "$vmid" "$host_ip" "ss -lntp 2>/dev/null | grep LISTEN || echo ''" 2>/dev/null || echo "")
if [ -n "$all_ports" ]; then
echo "$all_ports" > "$OUTPUT_DIR/vmid_${vmid}_listening_ports.txt"
fi
fi
# Health check endpoints
# Note: 301 = HTTPS redirect (normal); 404 = wrong port/path or NPMplus; 000 = no connection (host/firewall/context).
# See docs/04-configuration/DETAILED_GAPS_AND_ISSUES_LIST.md §11a.
HEALTH_ENDPOINTS=()
if [ "$status" = "running" ] && [ -n "$actual_ip" ]; then
# Test HTTP endpoints (nginx and web both use port 80)
if [ "$service_type" = "nginx" ] || [ "$service_type" = "web" ]; then
http_code=$(curl -s -o /dev/null -w "%{http_code}" --connect-timeout 3 "http://$actual_ip:80" 2>/dev/null || echo "000")
if [ "$http_code" != "000" ]; then
log_success "HTTP health check: $actual_ip:80 returned $http_code"
HEALTH_ENDPOINTS+=("{\"path\":\"http://$actual_ip:80\",\"expected_code\":200,\"actual_code\":$http_code,\"status\":\"$([ "$http_code" -ge 200 ] && [ "$http_code" -lt 400 ] && echo "pass" || echo "fail")\"}")
else
log_warn "HTTP health check: $actual_ip:80 failed"
HEALTH_ENDPOINTS+=("{\"path\":\"http://$actual_ip:80\",\"expected_code\":200,\"actual_code\":null,\"status\":\"fail\"}")
fi
fi
# Test RPC endpoints
if [ "$service_type" = "besu" ]; then
rpc_response=$(curl -s -X POST "http://$actual_ip:8545" \
-H 'Content-Type: application/json' \
-d '{"jsonrpc":"2.0","method":"eth_chainId","params":[],"id":1}' \
--connect-timeout 3 2>/dev/null || echo "")
if echo "$rpc_response" | grep -q "result"; then
log_success "RPC health check: $actual_ip:8545 responded"
HEALTH_ENDPOINTS+=("{\"path\":\"http://$actual_ip:8545\",\"expected_code\":200,\"actual_code\":200,\"status\":\"pass\"}")
else
log_warn "RPC health check: $actual_ip:8545 failed"
HEALTH_ENDPOINTS+=("{\"path\":\"http://$actual_ip:8545\",\"expected_code\":200,\"actual_code\":null,\"status\":\"fail\"}")
fi
fi
# Test Node.js API (prefer /health if available)
if [ "$service_type" = "nodejs" ]; then
http_code=$(curl -s -o /dev/null -w "%{http_code}" --connect-timeout 3 "http://$actual_ip:3000/health" 2>/dev/null || echo "000")
[ "$http_code" = "000" ] && http_code=$(curl -s -o /dev/null -w "%{http_code}" --connect-timeout 3 "http://$actual_ip:3000" 2>/dev/null || echo "000")
if [ "$http_code" != "000" ]; then
log_success "API health check: $actual_ip:3000 returned $http_code"
HEALTH_ENDPOINTS+=("{\"path\":\"http://$actual_ip:3000\",\"expected_code\":200,\"actual_code\":$http_code,\"status\":\"$([ "$http_code" -ge 200 ] && [ "$http_code" -lt 400 ] && echo "pass" || echo "fail")\"}")
else
log_warn "API health check: $actual_ip:3000 failed"
HEALTH_ENDPOINTS+=("{\"path\":\"http://$actual_ip:3000\",\"expected_code\":200,\"actual_code\":null,\"status\":\"fail\"}")
fi
fi
fi
# Build VM result JSON
local vm_result="{
\"vmid\": $vmid,
\"hostname\": \"$hostname\",
\"host\": \"$host\",
\"host_ip\": \"$host_ip\",
\"expected_ip\": \"$expected_ip\",
\"actual_ip\": \"${actual_ip:-}\",
\"status\": \"$status\",
\"has_nginx\": $([ "$service_type" = "nginx" ] && echo "true" || echo "false"),
\"service_type\": \"$service_type\",
\"config_path\": \"$config_path\",
\"public_domains\": [$(echo "$domains" | tr ',' '\n' | sed 's/^/"/' | sed 's/$/"/' | paste -sd',' -)],
\"services\": [$(IFS=','; echo "${SERVICES[*]}")],
\"listening_ports\": [$(IFS=','; echo "${LISTENING_PORTS[*]}")],
\"health_endpoints\": [$(IFS=','; echo "${HEALTH_ENDPOINTS[*]}")],
\"verified_at\": \"$(date -Iseconds)\"
}"
echo "$vm_result" > "$OUTPUT_DIR/vmid_${vmid}_verification.json"
echo "$vm_result" | jq -c . 2>/dev/null || echo "$vm_result"
}
echo ""
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
echo "🔍 Backend VMs Verification"
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
echo ""
ALL_VM_RESULTS=()
for vmid in "${!VM_CONFIGS[@]}"; do
result=$(verify_vm "$vmid")
if [ -n "$result" ]; then
ALL_VM_RESULTS+=("$result")
fi
done
# Combine all results (compact JSON, one per line for jq -s)
printf '%s\n' "${ALL_VM_RESULTS[@]}" | jq -s '.' > "$OUTPUT_DIR/all_vms_verification.json" 2>/dev/null || {
log_warn "jq merge failed, writing raw results"
printf '%s\n' "${ALL_VM_RESULTS[@]}" > "$OUTPUT_DIR/all_vms_verification.json"
}
# Generate report
REPORT_FILE="$OUTPUT_DIR/verification_report.md"
cat > "$REPORT_FILE" <<EOF
# Backend VMs Verification Report
**Date**: $(date -Iseconds)
**Verifier**: $(whoami)
## Summary
Total VMs verified: ${#VM_CONFIGS[@]}
## VM Verification Results
EOF
for result in "${ALL_VM_RESULTS[@]}"; do
vmid=$(echo "$result" | jq -r '.vmid' 2>/dev/null || echo "")
hostname=$(echo "$result" | jq -r '.hostname' 2>/dev/null || echo "")
status=$(echo "$result" | jq -r '.status' 2>/dev/null || echo "unknown")
expected_ip=$(echo "$result" | jq -r '.expected_ip' 2>/dev/null || echo "")
actual_ip=$(echo "$result" | jq -r '.actual_ip' 2>/dev/null || echo "")
has_nginx=$(echo "$result" | jq -r '.has_nginx' 2>/dev/null || echo "false")
echo "" >> "$REPORT_FILE"
echo "### VMID $vmid: $hostname" >> "$REPORT_FILE"
echo "- Status: $status" >> "$REPORT_FILE"
echo "- Expected IP: $expected_ip" >> "$REPORT_FILE"
echo "- Actual IP: ${actual_ip:-unknown}" >> "$REPORT_FILE"
echo "- Has Nginx: $has_nginx" >> "$REPORT_FILE"
echo "- Details: See \`vmid_${vmid}_verification.json\`" >> "$REPORT_FILE"
done
cat >> "$REPORT_FILE" <<EOF
## Files Generated
- \`all_vms_verification.json\` - Complete VM verification results
- \`vmid_*_verification.json\` - Individual VM verification details
- \`vmid_*_listening_ports.txt\` - Listening ports output per VM
- \`verification_report.md\` - This report
## Next Steps
1. Review verification results for each VM
2. Investigate any VMs with mismatched IPs or failed health checks
3. Document any missing nginx config paths
4. Update source-of-truth JSON after verification
EOF
log_info ""
log_info "Verification complete!"
log_success "Report: $REPORT_FILE"
log_success "All results: $OUTPUT_DIR/all_vms_verification.json"