#!/usr/bin/env bash # Investigate Transaction Failures on All RPC Nodes # Checks logs, transaction pool, recent transactions, and node status set -euo pipefail # Load IP configuration SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" source "${PROJECT_ROOT}/config/ip-addresses.conf" 2>/dev/null || true SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" # Colors RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' BLUE='\033[0;34m' CYAN='\033[0;36m' NC='\033[0m' log_info() { echo -e "${BLUE}[INFO]${NC} $1"; } log_success() { echo -e "${GREEN}[✓]${NC} $1"; } log_warn() { echo -e "${YELLOW}[⚠]${NC} $1"; } log_error() { echo -e "${RED}[✗]${NC} $1"; } log_section() { echo -e "${CYAN}════════════════════════════════════════${NC}"; } # RPC Nodes - All running nodes declare -A RPC_NODES RPC_NODES[2400]="${RPC_THIRDWEB_PRIMARY:-${RPC_THIRDWEB_PRIMARY:-192.168.11.240}}:thirdweb-rpc-1" RPC_NODES[2401]="${RPC_THIRDWEB_1:-${RPC_THIRDWEB_1:-${RPC_THIRDWEB_1:-${RPC_THIRDWEB_1:-${RPC_THIRDWEB_1:-${RPC_THIRDWEB_1:-${RPC_THIRDWEB_1:-192.168.11.241}}}}}}}:thirdweb-rpc-2" RPC_NODES[2402]="${RPC_THIRDWEB_2:-${RPC_THIRDWEB_2:-${RPC_THIRDWEB_2:-${RPC_THIRDWEB_2:-${RPC_THIRDWEB_2:-${RPC_THIRDWEB_2:-${RPC_THIRDWEB_2:-192.168.11.242}}}}}}}:thirdweb-rpc-3" RPC_NODES[2201]="${RPC_PUBLIC_1:-192.168.11.221}:besu-rpc-public-1" RPC_NODES[2501]="${RPC_ALI_1:-${RPC_ALI_1:-${RPC_ALI_1:-${RPC_ALI_1:-${RPC_ALI_1:-${RPC_ALI_1:-${RPC_ALI_1:-192.168.11.251}}}}}}}:besu-rpc-2" RPC_NODES[2502]="${RPC_ALI_2:-${RPC_ALI_2:-${RPC_ALI_2:-${RPC_ALI_2:-${RPC_ALI_2:-${RPC_ALI_2:-${RPC_ALI_2:-192.168.11.252}}}}}}}:besu-rpc-3" RPC_NODES[2505]="${IP_VAULT_PHOENIX_2:-192.168.11.201}:besu-rpc-luis-0x8a" RPC_NODES[2506]="${IP_SERVICE_202:-${IP_SERVICE_202:-192.168.11.202}}:besu-rpc-luis-0x1" RPC_NODES[2507]="${IP_SERVICE_203:-${IP_SERVICE_203:-192.168.11.203}}:besu-rpc-putu-0x8a" RPC_NODES[2508]="${IP_SERVICE_204:-${IP_SERVICE_204:-192.168.11.204}}:besu-rpc-putu-0x1" PROXMOX_HOST="${PROXMOX_HOST:-192.168.11.10}" # Function to execute RPC call rpc_call() { local ip="$1" local method="$2" local params="${3:-[]}" local port="${4:-8545}" curl -s -X POST "http://${ip}:${port}" \ -H 'Content-Type: application/json' \ -d "{\"jsonrpc\":\"2.0\",\"method\":\"${method}\",\"params\":${params},\"id\":1}" 2>/dev/null || echo "{}" } # Function to check node check_node() { local vmid="$1" local ip_hostname="$2" local ip="${ip_hostname%%:*}" local hostname="${ip_hostname#*:}" log_section log_info "Checking VMID ${vmid} - ${hostname} (${ip})" log_section echo "" # 1. Check if container is running log_info "1. Container Status" CONTAINER_STATUS=$(ssh -o StrictHostKeyChecking=no -o ConnectTimeout=5 root@"$PROXMOX_HOST" \ "pvesh get /nodes/\$(hostname)/lxc/${vmid}/status/current --output-format json 2>/dev/null | grep -o '\"status\":\"[^\"]*\"' | head -1 | cut -d'\"' -f4" 2>/dev/null || echo "unknown") if [ "$CONTAINER_STATUS" = "running" ]; then log_success "Container is running" else log_error "Container status: ${CONTAINER_STATUS}" echo "" return 1 fi echo "" # 2. Check Besu service status log_info "2. Besu Service Status" SERVICE_STATUS=$(ssh -o StrictHostKeyChecking=no -o ConnectTimeout=5 root@"$PROXMOX_HOST" \ "pct exec ${vmid} -- systemctl is-active besu-rpc 2>/dev/null || echo 'inactive'" 2>/dev/null || echo "unknown") if [ "$SERVICE_STATUS" = "active" ]; then log_success "Besu service is active" else log_warn "Besu service status: ${SERVICE_STATUS}" fi echo "" # 3. Check RPC connectivity log_info "3. RPC Connectivity" CHAIN_ID=$(rpc_call "$ip" "eth_chainId" | grep -o '"result":"[^"]*"' | cut -d'"' -f4 || echo "") if [ -n "$CHAIN_ID" ]; then log_success "RPC responding - Chain ID: ${CHAIN_ID}" else log_error "RPC not responding" echo "" return 1 fi echo "" # 4. Check block number and sync status log_info "4. Block Synchronization" BLOCK_HEX=$(rpc_call "$ip" "eth_blockNumber" | grep -o '"result":"[^"]*"' | cut -d'"' -f4 || echo "") if [ -n "$BLOCK_HEX" ]; then # Remove 0x prefix if present, then convert BLOCK_CLEAN="${BLOCK_HEX#0x}" BLOCK_DEC=$(printf "%d" "0x${BLOCK_CLEAN}" 2>/dev/null || echo "0") log_success "Current block: ${BLOCK_DEC} (${BLOCK_HEX})" else log_warn "Could not get block number" BLOCK_DEC="0" fi SYNCING=$(rpc_call "$ip" "eth_syncing" | grep -o '"result":[^,}]*' | grep -o 'true\|false' || echo "false") if [ "$SYNCING" = "false" ]; then log_success "Node is synchronized" else log_warn "Node is still syncing" fi echo "" # 5. Check recent logs for errors log_info "5. Recent Error Logs (last 50 lines)" RECENT_ERRORS=$(ssh -o StrictHostKeyChecking=no -o ConnectTimeout=5 root@"$PROXMOX_HOST" \ "pct exec ${vmid} -- journalctl -u besu-rpc --since '10 minutes ago' --no-pager 2>/dev/null | grep -iE 'error|exception|failed|revert|invalid' | tail -20" 2>/dev/null || echo "") if [ -z "$RECENT_ERRORS" ]; then log_success "No recent errors found in logs" else log_warn "Recent errors found:" echo "$RECENT_ERRORS" | while IFS= read -r line; do echo " $line" done fi echo "" # 6. Check transaction pool status (if available) log_info "6. Transaction Pool Status" TXPOOL_STATUS=$(rpc_call "$ip" "txpool_status" | grep -o '"result":{[^}]*}' || echo "") if [ -n "$TXPOOL_STATUS" ]; then PENDING=$(echo "$TXPOOL_STATUS" | grep -o '"pending":"[^"]*"' | cut -d'"' -f4 || echo "0") QUEUED=$(echo "$TXPOOL_STATUS" | grep -o '"queued":"[^"]*"' | cut -d'"' -f4 || echo "0") log_info "Pending: ${PENDING}, Queued: ${QUEUED}" else log_warn "Transaction pool status not available (may be RPC-only node)" fi echo "" # 7. Check recent blocks for transaction failures log_info "7. Recent Block Transactions" if [ -n "$BLOCK_HEX" ] && [ "$BLOCK_DEC" -gt 0 ]; then # Get last 5 blocks for i in {0..4}; do CHECK_BLOCK=$((BLOCK_DEC - i)) if [ "$CHECK_BLOCK" -gt 0 ]; then CHECK_BLOCK_HEX=$(printf "0x%x" "$CHECK_BLOCK" 2>/dev/null || echo "") if [ -n "$CHECK_BLOCK_HEX" ] && [ "$CHECK_BLOCK_HEX" != "0x0" ]; then BLOCK_DATA=$(rpc_call "$ip" "eth_getBlockByNumber" "[\"${CHECK_BLOCK_HEX}\", true]") TX_COUNT=$(echo "$BLOCK_DATA" | grep -o '"transactions":\[[^]]*\]' | grep -o '0x[^"]*' | wc -l || echo "0") if [ "$TX_COUNT" -gt 0 ]; then log_info "Block ${CHECK_BLOCK_HEX}: ${TX_COUNT} transaction(s)" # Check transaction receipts for failures TX_HASHES=$(echo "$BLOCK_DATA" | grep -o '"hash":"0x[^"]*"' | cut -d'"' -f4 | head -5) FAILED_COUNT=0 SUCCESS_COUNT=0 for tx_hash in $TX_HASHES; do TX_RECEIPT=$(rpc_call "$ip" "eth_getTransactionReceipt" "[\"${tx_hash}\"]") TX_STATUS=$(echo "$TX_RECEIPT" | grep -o '"status":"0x[^"]*"' | cut -d'"' -f4 || echo "") if [ "$TX_STATUS" = "0x0" ]; then log_error "Transaction ${tx_hash:0:10}...: FAILED (status 0x0)" FAILED_COUNT=$((FAILED_COUNT + 1)) elif [ "$TX_STATUS" = "0x1" ]; then SUCCESS_COUNT=$((SUCCESS_COUNT + 1)) fi done if [ "$FAILED_COUNT" -gt 0 ]; then log_warn "Block ${CHECK_BLOCK_HEX}: ${FAILED_COUNT} failed, ${SUCCESS_COUNT} succeeded" fi fi fi fi done fi echo "" # 8. Check peer connections log_info "8. Peer Connections" PEER_COUNT=$(rpc_call "$ip" "net_peerCount" | grep -o '"result":"[^"]*"' | cut -d'"' -f4 || echo "") if [ -n "$PEER_COUNT" ]; then PEER_DEC=$(printf "%d" "$PEER_COUNT" 2>/dev/null || echo "0") if [ "$PEER_DEC" -gt 0 ]; then log_success "Connected to ${PEER_DEC} peer(s)" else log_warn "No peers connected" fi else log_warn "Could not get peer count" fi echo "" # 9. Check for thread blocking warnings log_info "9. Thread Blocking Warnings" THREAD_BLOCKS=$(ssh -o StrictHostKeyChecking=no -o ConnectTimeout=5 root@"$PROXMOX_HOST" \ "pct exec ${vmid} -- journalctl -u besu-rpc --since '1 hour ago' --no-pager 2>/dev/null | grep -i 'thread.*blocked' | tail -5" 2>/dev/null || echo "") if [ -z "$THREAD_BLOCKS" ]; then log_success "No thread blocking warnings" else log_warn "Thread blocking warnings found:" echo "$THREAD_BLOCKS" | while IFS= read -r line; do echo " $line" done fi echo "" echo "----------------------------------------" echo "" } # Main execution log_section log_info "RPC Transaction Failure Investigation" log_info "Date: $(date)" log_section echo "" # Check all RPC nodes for vmid in "${!RPC_NODES[@]}"; do check_node "$vmid" "${RPC_NODES[$vmid]}" || log_warn "Skipping VMID ${vmid} due to errors" done log_section log_info "Investigation Complete" log_section echo "" log_info "Summary:" log_info "- Checked all running RPC nodes for transaction failures" log_info "- Reviewed logs, transaction pool, and recent blocks" log_info "- Checked synchronization and peer connectivity" echo ""