Some checks failed
Deploy to Phoenix / deploy (push) Has been cancelled
- Marked submodules ai-mcp-pmm-controller, explorer-monorepo, and smom-dbis-138 as dirty to reflect recent changes. - Updated documentation to clarify operator script usage, including dotenv loading and task execution instructions. - Enhanced the README and various index files to provide clearer navigation and task completion guidance. Made-with: Cursor
338 lines
16 KiB
Bash
Executable File
338 lines
16 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# Verify backend VMs configuration
|
|
# Checks status, IPs, services, ports, config files, and health endpoints
|
|
|
|
set -euo pipefail
|
|
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
|
EVIDENCE_DIR="$PROJECT_ROOT/docs/04-configuration/verification-evidence"
|
|
|
|
# Colors
|
|
RED='\033[0;31m'
|
|
GREEN='\033[0;32m'
|
|
YELLOW='\033[1;33m'
|
|
BLUE='\033[0;34m'
|
|
CYAN='\033[0;36m'
|
|
NC='\033[0m'
|
|
|
|
log_info() { echo -e "${BLUE}[INFO]${NC} $1" >&2; }
|
|
log_success() { echo -e "${GREEN}[✓]${NC} $1" >&2; }
|
|
log_warn() { echo -e "${YELLOW}[⚠]${NC} $1" >&2; }
|
|
log_error() { echo -e "${RED}[✗]${NC} $1" >&2; }
|
|
|
|
cd "$PROJECT_ROOT"
|
|
[ -f .env ] && source .env 2>/dev/null || true
|
|
[ -f config/ip-addresses.conf ] && source config/ip-addresses.conf 2>/dev/null || true
|
|
ML110_IP="${PROXMOX_HOST_ML110:-192.168.11.10}"
|
|
R630_01_IP="${PROXMOX_HOST_R630_01:-192.168.11.11}"
|
|
R630_02_IP="${PROXMOX_HOST_R630_02:-192.168.11.12}"
|
|
|
|
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
|
|
OUTPUT_DIR="$EVIDENCE_DIR/backend-vms-verification-$TIMESTAMP"
|
|
mkdir -p "$OUTPUT_DIR"
|
|
|
|
# VMIDs that are optional (not deployed yet); unknown status = informational only. Space-separated.
|
|
VM_OPTIONAL_WHEN_MISSING="${VM_OPTIONAL_WHEN_MISSING:-5801}"
|
|
|
|
# Backend VMs from baseline docs
|
|
declare -A VM_CONFIGS=(
|
|
# VMs with nginx
|
|
["5000"]="${IP_BLOCKSCOUT:-192.168.11.140}|blockscout-1|r630-02|${R630_02_IP}|nginx|/etc/nginx/sites-available/blockscout|explorer.d-bis.org"
|
|
["7810"]="${IP_MIM_WEB:-192.168.11.37}|mim-web-1|r630-02|${R630_02_IP}|nginx|/etc/nginx/sites-available/mim4u|mim4u.org,www.mim4u.org,secure.mim4u.org,training.mim4u.org"
|
|
["10130"]="${IP_DBIS_FRONTEND:-192.168.11.130}|dbis-frontend|r630-01|${R630_01_IP}|web|/etc/nginx/sites-available/dbis-frontend|dbis-admin.d-bis.org,secure.d-bis.org"
|
|
["2400"]="${RPC_THIRDWEB_PRIMARY:-${RPC_THIRDWEB_PRIMARY:-192.168.11.240}}|thirdweb-rpc-1|ml110|${ML110_IP}|nginx|/etc/nginx/sites-available/rpc-thirdweb|rpc.public-0138.defi-oracle.io"
|
|
# VMs without nginx
|
|
["2101"]="${RPC_CORE_1:-192.168.11.211}|besu-rpc-core-1|r630-01|${R630_01_IP}|besu|8545,8546|rpc-http-prv.d-bis.org,rpc-ws-prv.d-bis.org"
|
|
["2201"]="${RPC_PUBLIC_1:-192.168.11.221}|besu-rpc-public-1|r630-02|${PROXMOX_HOST_R630_02:-192.168.11.12}|besu|8545,8546|rpc-http-pub.d-bis.org,rpc-ws-pub.d-bis.org"
|
|
["10150"]="${IP_DBIS_API:-${IP_DBIS_API:-192.168.11.155}}|dbis-api-primary|r630-01|${R630_01_IP}|nodejs|3000|dbis-api.d-bis.org"
|
|
["10151"]="${IP_DBIS_API_2:-${IP_DBIS_API_2:-192.168.11.156}}|dbis-api-secondary|r630-01|${R630_01_IP}|nodejs|3000|dbis-api-2.d-bis.org"
|
|
# Mifos X + Fineract (VMID 5800); NPMplus 10237 proxies to this
|
|
["5800"]="${MIFOS_IP:-192.168.11.85}|mifos|r630-02|${R630_02_IP}|web|-|mifos.d-bis.org"
|
|
# DApp LXC (VMID 5801); NPMplus 10233 (or tunnel) proxies to this
|
|
["5801"]="${IP_DAPP_LXC:-192.168.11.58}|dapp-smom|r630-02|${R630_02_IP}|web|-|dapp.d-bis.org"
|
|
)
|
|
|
|
exec_in_vm() {
|
|
local vmid=$1
|
|
local host=$2
|
|
local cmd=$3
|
|
# Use --norc to avoid .bashrc permission errors; redirect its stderr
|
|
ssh -o StrictHostKeyChecking=no -o ConnectTimeout=10 root@"$host" "pct exec $vmid -- bash --norc -c '$cmd'" 2>/dev/null || echo "COMMAND_FAILED"
|
|
}
|
|
|
|
verify_vm() {
|
|
local vmid=$1
|
|
local config="${VM_CONFIGS[$vmid]}"
|
|
|
|
IFS='|' read -r expected_ip hostname host host_ip service_type config_path domains <<< "$config"
|
|
|
|
log_info ""
|
|
log_info "Verifying VMID $vmid: $hostname"
|
|
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" >&2
|
|
|
|
# Check VM status
|
|
VM_STATUS=$(ssh -o StrictHostKeyChecking=no root@"$host_ip" "pct status $vmid 2>/dev/null || qm status $vmid 2>/dev/null" 2>&1 || echo "unknown")
|
|
|
|
if echo "$VM_STATUS" | grep -q "running"; then
|
|
status="running"
|
|
log_success "Status: Running"
|
|
elif echo "$VM_STATUS" | grep -q "stopped"; then
|
|
status="stopped"
|
|
log_warn "Status: Stopped"
|
|
else
|
|
status="unknown"
|
|
if echo " $VM_OPTIONAL_WHEN_MISSING " | grep -qF " $vmid "; then
|
|
log_info "Status: Not deployed (optional VMID; deploy with deploy-dapp-lxc.sh to verify)"
|
|
else
|
|
log_warn "Status: Unknown"
|
|
fi
|
|
fi
|
|
|
|
# Get actual IP (use cut to avoid awk quoting issues in ssh)
|
|
if [ "$status" = "running" ]; then
|
|
# Prefer pct config - parse net0: ...ip=X.X.X.X/24 or ip0=X.X.X.X
|
|
actual_ip=$(ssh -o StrictHostKeyChecking=no root@"$host_ip" "pct config $vmid 2>/dev/null | grep -oE 'ip=[0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+' | head -1 | cut -d= -f2" 2>/dev/null || echo "")
|
|
if [ -z "$actual_ip" ] || ! [[ "$actual_ip" =~ ^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
|
|
actual_ip=$(exec_in_vm "$vmid" "$host_ip" 'hostname -I 2>/dev/null | cut -d" " -f1' 2>/dev/null | head -1 | tr -d '\n\r' || echo "")
|
|
fi
|
|
if [ "$actual_ip" = "COMMAND_FAILED" ] || [[ "$actual_ip" == *"awk"* ]] || [[ "$actual_ip" == *"error"* ]] || [[ "$actual_ip" == *"Permission denied"* ]] || [[ "$actual_ip" == *"bash:"* ]] || ! [[ "$actual_ip" =~ ^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
|
|
actual_ip=""
|
|
fi
|
|
else
|
|
actual_ip=""
|
|
fi
|
|
|
|
if [ -n "$actual_ip" ] && [ "$actual_ip" = "$expected_ip" ]; then
|
|
log_success "IP: $actual_ip (matches expected)"
|
|
elif [ -n "$actual_ip" ]; then
|
|
log_warn "IP: $actual_ip (expected $expected_ip)"
|
|
else
|
|
log_warn "IP: Could not determine (expected $expected_ip)"
|
|
fi
|
|
|
|
# Check services and ports
|
|
SERVICES=()
|
|
LISTENING_PORTS=()
|
|
|
|
if [ "$status" = "running" ]; then
|
|
# Check nginx
|
|
if [ "$service_type" = "nginx" ]; then
|
|
nginx_status=$(exec_in_vm "$vmid" "$host_ip" "systemctl is-active nginx 2>/dev/null || echo 'inactive'" 2>/dev/null | head -1 | tr -d '\n\r' || echo "unknown")
|
|
if [ "$nginx_status" = "active" ]; then
|
|
log_success "Nginx: Active"
|
|
SERVICES+=("{\"name\":\"nginx\",\"type\":\"systemd\",\"status\":\"active\"}")
|
|
|
|
# Get nginx config paths
|
|
if [ "$config_path" != "TBD" ] && [ -n "$config_path" ]; then
|
|
config_exists=$(exec_in_vm "$vmid" "$host_ip" "test -f $config_path && echo 'yes' || echo 'no'" 2>/dev/null || echo "unknown")
|
|
if [ "$config_exists" = "yes" ]; then
|
|
log_success "Nginx config: $config_path exists"
|
|
else
|
|
log_warn "Nginx config: $config_path not found"
|
|
fi
|
|
fi
|
|
|
|
# List enabled sites (xargs joins lines without tr escaping issues)
|
|
enabled_sites=$(exec_in_vm "$vmid" "$host_ip" 'ls -1 /etc/nginx/sites-enabled/ 2>/dev/null | xargs' 2>/dev/null || echo "")
|
|
if [ -n "$enabled_sites" ]; then
|
|
log_info "Enabled sites: $enabled_sites"
|
|
fi
|
|
else
|
|
log_warn "Nginx: $nginx_status"
|
|
nginx_status_clean=$(echo "$nginx_status" | head -1 | tr -d '\n\r"' || echo "unknown")
|
|
SERVICES+=("{\"name\":\"nginx\",\"type\":\"systemd\",\"status\":\"$nginx_status_clean\"}")
|
|
fi
|
|
fi
|
|
|
|
# Check Besu RPC
|
|
if [ "$service_type" = "besu" ]; then
|
|
# Check if port is listening
|
|
for port in 8545 8546; do
|
|
port_check=$(exec_in_vm "$vmid" "$host_ip" "ss -lntp 2>/dev/null | grep ':$port ' || echo ''" 2>/dev/null || echo "")
|
|
if [ -n "$port_check" ]; then
|
|
log_success "Port $port: Listening"
|
|
LISTENING_PORTS+=("{\"port\":$port,\"protocol\":\"tcp\",\"process\":\"besu\"}")
|
|
else
|
|
log_warn "Port $port: Not listening"
|
|
fi
|
|
done
|
|
SERVICES+=("{\"name\":\"besu-rpc\",\"type\":\"direct\",\"status\":\"running\"}")
|
|
fi
|
|
|
|
# Check Node.js API
|
|
if [ "$service_type" = "nodejs" ]; then
|
|
port_check=$(exec_in_vm "$vmid" "$host_ip" "ss -lntp 2>/dev/null | grep ':3000 ' || echo ''" 2>/dev/null || echo "")
|
|
if [ -n "$port_check" ]; then
|
|
log_success "Port 3000: Listening"
|
|
LISTENING_PORTS+=("{\"port\":3000,\"protocol\":\"tcp\",\"process\":\"nodejs\"}")
|
|
else
|
|
log_warn "Port 3000: Not listening"
|
|
fi
|
|
SERVICES+=("{\"name\":\"nodejs-api\",\"type\":\"systemd\",\"status\":\"running\"}")
|
|
fi
|
|
|
|
# Check web (HTTP on port 80, e.g. Python/Node serving dbis-frontend)
|
|
if [ "$service_type" = "web" ]; then
|
|
port_check=$(exec_in_vm "$vmid" "$host_ip" "ss -lntp 2>/dev/null | grep ':80 ' || echo ''" 2>/dev/null || echo "")
|
|
if [ -n "$port_check" ]; then
|
|
log_success "Port 80: Listening"
|
|
LISTENING_PORTS+=("{\"port\":80,\"protocol\":\"tcp\",\"process\":\"http\"}")
|
|
else
|
|
log_warn "Port 80: Not listening"
|
|
fi
|
|
SERVICES+=("{\"name\":\"http\",\"type\":\"direct\",\"status\":\"running\"}")
|
|
fi
|
|
|
|
# Get all listening ports
|
|
all_ports=$(exec_in_vm "$vmid" "$host_ip" "ss -lntp 2>/dev/null | grep LISTEN || echo ''" 2>/dev/null || echo "")
|
|
if [ -n "$all_ports" ]; then
|
|
echo "$all_ports" > "$OUTPUT_DIR/vmid_${vmid}_listening_ports.txt"
|
|
fi
|
|
fi
|
|
|
|
# Health check endpoints
|
|
# Note: 301 = HTTPS redirect (normal); 404 = wrong port/path or NPMplus; 000 = no connection (host/firewall/context).
|
|
# See docs/04-configuration/DETAILED_GAPS_AND_ISSUES_LIST.md §11a.
|
|
HEALTH_ENDPOINTS=()
|
|
if [ "$status" = "running" ] && [ -n "$actual_ip" ]; then
|
|
# Test HTTP endpoints (nginx and web both use port 80)
|
|
if [ "$service_type" = "nginx" ] || [ "$service_type" = "web" ]; then
|
|
http_code=$(curl -s -o /dev/null -w "%{http_code}" --connect-timeout 3 "http://$actual_ip:80" 2>/dev/null || echo "000")
|
|
if [ "$http_code" != "000" ]; then
|
|
log_success "HTTP health check: $actual_ip:80 returned $http_code"
|
|
HEALTH_ENDPOINTS+=("{\"path\":\"http://$actual_ip:80\",\"expected_code\":200,\"actual_code\":$http_code,\"status\":\"$([ "$http_code" -ge 200 ] && [ "$http_code" -lt 400 ] && echo "pass" || echo "fail")\"}")
|
|
else
|
|
log_warn "HTTP health check: $actual_ip:80 failed"
|
|
HEALTH_ENDPOINTS+=("{\"path\":\"http://$actual_ip:80\",\"expected_code\":200,\"actual_code\":null,\"status\":\"fail\"}")
|
|
fi
|
|
fi
|
|
|
|
# Test RPC endpoints
|
|
if [ "$service_type" = "besu" ]; then
|
|
rpc_response=$(curl -s -X POST "http://$actual_ip:8545" \
|
|
-H 'Content-Type: application/json' \
|
|
-d '{"jsonrpc":"2.0","method":"eth_chainId","params":[],"id":1}' \
|
|
--connect-timeout 3 2>/dev/null || echo "")
|
|
if echo "$rpc_response" | grep -q "result"; then
|
|
log_success "RPC health check: $actual_ip:8545 responded"
|
|
HEALTH_ENDPOINTS+=("{\"path\":\"http://$actual_ip:8545\",\"expected_code\":200,\"actual_code\":200,\"status\":\"pass\"}")
|
|
else
|
|
log_warn "RPC health check: $actual_ip:8545 failed"
|
|
HEALTH_ENDPOINTS+=("{\"path\":\"http://$actual_ip:8545\",\"expected_code\":200,\"actual_code\":null,\"status\":\"fail\"}")
|
|
fi
|
|
fi
|
|
|
|
# Test Node.js API (prefer /health if available)
|
|
if [ "$service_type" = "nodejs" ]; then
|
|
http_code=$(curl -s -o /dev/null -w "%{http_code}" --connect-timeout 3 "http://$actual_ip:3000/health" 2>/dev/null || echo "000")
|
|
[ "$http_code" = "000" ] && http_code=$(curl -s -o /dev/null -w "%{http_code}" --connect-timeout 3 "http://$actual_ip:3000" 2>/dev/null || echo "000")
|
|
if [ "$http_code" != "000" ]; then
|
|
log_success "API health check: $actual_ip:3000 returned $http_code"
|
|
HEALTH_ENDPOINTS+=("{\"path\":\"http://$actual_ip:3000\",\"expected_code\":200,\"actual_code\":$http_code,\"status\":\"$([ "$http_code" -ge 200 ] && [ "$http_code" -lt 400 ] && echo "pass" || echo "fail")\"}")
|
|
else
|
|
log_warn "API health check: $actual_ip:3000 failed"
|
|
HEALTH_ENDPOINTS+=("{\"path\":\"http://$actual_ip:3000\",\"expected_code\":200,\"actual_code\":null,\"status\":\"fail\"}")
|
|
fi
|
|
fi
|
|
fi
|
|
|
|
# Build VM result JSON
|
|
local vm_result="{
|
|
\"vmid\": $vmid,
|
|
\"hostname\": \"$hostname\",
|
|
\"host\": \"$host\",
|
|
\"host_ip\": \"$host_ip\",
|
|
\"expected_ip\": \"$expected_ip\",
|
|
\"actual_ip\": \"${actual_ip:-}\",
|
|
\"status\": \"$status\",
|
|
\"has_nginx\": $([ "$service_type" = "nginx" ] && echo "true" || echo "false"),
|
|
\"service_type\": \"$service_type\",
|
|
\"config_path\": \"$config_path\",
|
|
\"public_domains\": [$(echo "$domains" | tr ',' '\n' | sed 's/^/"/' | sed 's/$/"/' | paste -sd',' -)],
|
|
\"services\": [$(IFS=','; echo "${SERVICES[*]}")],
|
|
\"listening_ports\": [$(IFS=','; echo "${LISTENING_PORTS[*]}")],
|
|
\"health_endpoints\": [$(IFS=','; echo "${HEALTH_ENDPOINTS[*]}")],
|
|
\"verified_at\": \"$(date -Iseconds)\"
|
|
}"
|
|
|
|
echo "$vm_result" > "$OUTPUT_DIR/vmid_${vmid}_verification.json"
|
|
echo "$vm_result" | jq -c . 2>/dev/null || echo "$vm_result"
|
|
}
|
|
|
|
echo ""
|
|
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
|
echo "🔍 Backend VMs Verification"
|
|
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
|
echo ""
|
|
|
|
ALL_VM_RESULTS=()
|
|
|
|
for vmid in "${!VM_CONFIGS[@]}"; do
|
|
result=$(verify_vm "$vmid")
|
|
if [ -n "$result" ]; then
|
|
ALL_VM_RESULTS+=("$result")
|
|
fi
|
|
done
|
|
|
|
# Combine all results (compact JSON, one per line for jq -s)
|
|
printf '%s\n' "${ALL_VM_RESULTS[@]}" | jq -s '.' > "$OUTPUT_DIR/all_vms_verification.json" 2>/dev/null || {
|
|
log_warn "jq merge failed, writing raw results"
|
|
printf '%s\n' "${ALL_VM_RESULTS[@]}" > "$OUTPUT_DIR/all_vms_verification.json"
|
|
}
|
|
|
|
# Generate report
|
|
REPORT_FILE="$OUTPUT_DIR/verification_report.md"
|
|
cat > "$REPORT_FILE" <<EOF
|
|
# Backend VMs Verification Report
|
|
|
|
**Date**: $(date -Iseconds)
|
|
**Verifier**: $(whoami)
|
|
|
|
## Summary
|
|
|
|
Total VMs verified: ${#VM_CONFIGS[@]}
|
|
|
|
## VM Verification Results
|
|
|
|
EOF
|
|
|
|
for result in "${ALL_VM_RESULTS[@]}"; do
|
|
vmid=$(echo "$result" | jq -r '.vmid' 2>/dev/null || echo "")
|
|
hostname=$(echo "$result" | jq -r '.hostname' 2>/dev/null || echo "")
|
|
status=$(echo "$result" | jq -r '.status' 2>/dev/null || echo "unknown")
|
|
expected_ip=$(echo "$result" | jq -r '.expected_ip' 2>/dev/null || echo "")
|
|
actual_ip=$(echo "$result" | jq -r '.actual_ip' 2>/dev/null || echo "")
|
|
has_nginx=$(echo "$result" | jq -r '.has_nginx' 2>/dev/null || echo "false")
|
|
|
|
echo "" >> "$REPORT_FILE"
|
|
echo "### VMID $vmid: $hostname" >> "$REPORT_FILE"
|
|
echo "- Status: $status" >> "$REPORT_FILE"
|
|
echo "- Expected IP: $expected_ip" >> "$REPORT_FILE"
|
|
echo "- Actual IP: ${actual_ip:-unknown}" >> "$REPORT_FILE"
|
|
echo "- Has Nginx: $has_nginx" >> "$REPORT_FILE"
|
|
echo "- Details: See \`vmid_${vmid}_verification.json\`" >> "$REPORT_FILE"
|
|
done
|
|
|
|
cat >> "$REPORT_FILE" <<EOF
|
|
|
|
## Files Generated
|
|
|
|
- \`all_vms_verification.json\` - Complete VM verification results
|
|
- \`vmid_*_verification.json\` - Individual VM verification details
|
|
- \`vmid_*_listening_ports.txt\` - Listening ports output per VM
|
|
- \`verification_report.md\` - This report
|
|
|
|
## Next Steps
|
|
|
|
1. Review verification results for each VM
|
|
2. Investigate any VMs with mismatched IPs or failed health checks
|
|
3. Document any missing nginx config paths
|
|
4. Update source-of-truth JSON after verification
|
|
EOF
|
|
|
|
log_info ""
|
|
log_info "Verification complete!"
|
|
log_success "Report: $REPORT_FILE"
|
|
log_success "All results: $OUTPUT_DIR/all_vms_verification.json"
|