#!/usr/bin/env bash # Clear transaction pools on all Besu nodes (validators, Core/Thirdweb/public RPC). # # Stuck txs often reappear on Core when sentries or Alltra/Hybx RPC CTs (still on the P2P mesh) were not cleared. # To include those: CLEAR_BESU_PEER_TXPOOLS=1 bash scripts/clear-all-transaction-pools.sh # Peer tier (1500–1502, 2420/2430/2440/2460/2470/2480) uses pct stop + pct mount on the PVE host (avoids hung systemctl inside flaky CTs). # # SSH: uses PROXMOX_SSH_USER from config/ip-addresses.conf (root). If .env sets PROXMOX_USER=root@pam for the API, # that value is NOT used for SSH (see PROXMOX_USER= assignment below). # Afterward: Core RPC (2101) can take several minutes to bind JSON-RPC while RocksDB opens/compacts; public RPC may recover first. set -euo pipefail # Load IP configuration SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" source "${PROJECT_ROOT}/config/ip-addresses.conf" 2>/dev/null || true # Peer-tier Besu CTs (sentries, Alltra/Hybx RPC) retain mempool and can re-gossip txs to Core after a clear. # Default: skip them (faster). To flush those pools too: CLEAR_BESU_PEER_TXPOOLS=1 bash scripts/clear-all-transaction-pools.sh CLEAR_BESU_PEER_TXPOOLS="${CLEAR_BESU_PEER_TXPOOLS:-0}" # Shell SSH must be root@host — not root@pam@host (.env often sets PROXMOX_USER=root@pam for API). PROXMOX_SSH_USER="${PROXMOX_SSH_USER:-root}" [[ "$PROXMOX_SSH_USER" == *"@"* ]] && PROXMOX_SSH_USER="root" PROXMOX_USER="$PROXMOX_SSH_USER" PROXMOX_ML110="${PROXMOX_ML110:-${PROXMOX_HOST_ML110:-192.168.11.10}}" PROXMOX_R630="${PROXMOX_R630:-${PROXMOX_R630_01:-${PROXMOX_HOST_R630_01:-192.168.11.11}}}" R630_03="${PROXMOX_R630_03:-${PROXMOX_HOST_R630_03:-192.168.11.13}}" R630_02="${PROXMOX_R630_02:-${PROXMOX_HOST_R630_02:-192.168.11.12}}" # Colors RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' BLUE='\033[0;34m' CYAN='\033[0;36m' NC='\033[0m' log_info() { echo -e "${BLUE}[INFO]${NC} $1"; } log_success() { echo -e "${GREEN}[✓]${NC} $1"; } log_warn() { echo -e "${YELLOW}[⚠]${NC} $1"; } log_error() { echo -e "${RED}[✗]${NC} $1"; } log_section() { echo -e "\n${CYAN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"; echo -e "${CYAN}$1${NC}"; echo -e "${CYAN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}\n"; } echo "=== Clear Transaction Pools on All Nodes ===" echo "" # Function to clear transaction pool for a node clear_node_pool() { local VMID=$1 local HOST=$2 local NODE_TYPE=$3 local SSH_TARGET="${PROXMOX_USER}@${HOST}" log_info "Clearing transaction pool for $NODE_TYPE (VMID $VMID on $HOST)..." # Stop the service (timeout each stop — Besu can hang on SIGTERM during heavy I/O) log_info " Stopping service..." ssh -o ConnectTimeout=10 -o StrictHostKeyChecking=no "$SSH_TARGET" \ "pct exec $VMID -- bash -c ' stop_besu() { local n=\"\$1\" timeout 90 systemctl stop \"\${n}.service\" 2>/dev/null || timeout 90 systemctl stop \"\$n\" 2>/dev/null || true } stop_besu besu-validator stop_besu besu-rpc-core stop_besu besu-rpc stop_besu besu-sentry '" 2>&1 | grep -v "Configuration file" || true sleep 2 # Find and clear transaction pool database log_info " Clearing transaction pool database..." CLEAR_RESULT=$(ssh -o ConnectTimeout=10 -o StrictHostKeyChecking=no "$SSH_TARGET" \ "pct exec $VMID -- bash -c ' DATA_DIRS=\"/data/besu /var/lib/besu\" for DATA_DIR in \$DATA_DIRS; do if [ -d \"\$DATA_DIR\" ]; then # Find transaction pool database files find \"\$DATA_DIR\" -type d -name \"*pool*\" -exec rm -rf {} \; 2>/dev/null || true find \"\$DATA_DIR\" -type f -name \"*transaction*\" -delete 2>/dev/null || true find \"\$DATA_DIR\" -type f -name \"*txpool*\" -delete 2>/dev/null || true echo \"Cleared: \$DATA_DIR\" fi done '" 2>&1 | grep -v "Configuration file" || echo "Cleared") if [ -n "$CLEAR_RESULT" ]; then log_success " Transaction pool cleared" else log_warn " Could not clear transaction pool (may not exist)" fi # Restart the service (first successful start wins; timeout avoids indefinite hang) log_info " Restarting service..." ssh -o ConnectTimeout=10 -o StrictHostKeyChecking=no "$SSH_TARGET" \ "pct exec $VMID -- bash -c ' timeout 120 systemctl start besu-validator.service 2>/dev/null || timeout 120 systemctl start besu-validator 2>/dev/null || \ timeout 120 systemctl start besu-rpc-core.service 2>/dev/null || timeout 120 systemctl start besu-rpc-core 2>/dev/null || \ timeout 120 systemctl start besu-rpc.service 2>/dev/null || timeout 120 systemctl start besu-rpc 2>/dev/null || \ timeout 120 systemctl start besu-sentry.service 2>/dev/null || timeout 120 systemctl start besu-sentry 2>/dev/null || true '" 2>&1 | grep -v "Configuration file" || true sleep 3 # Verify at least one Besu unit is active (single line — avoids inactive\\ninactive\\nactive noise) STATUS=$(ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no "$SSH_TARGET" \ "pct exec $VMID -- bash -c ' for u in besu-validator besu-rpc-core besu-rpc besu-sentry; do for s in \"\$u\" \"\${u}.service\"; do st=\$(systemctl is-active \"\$s\" 2>/dev/null || true) [ \"\$st\" = active ] && { echo active; exit 0; } done done echo inactive '" 2>&1 | grep -v "Configuration file" | tr -d '\r' | tail -1) \ || STATUS="unknown" if [ "$STATUS" = "active" ]; then log_success " Service restarted and active" else log_warn " Service status: $STATUS" fi echo "" } # Clear txpool files while CT is stopped — mount rootfs from PVE host (reliable for sentries / Alltra RPC). clear_peer_txpool_via_pct_mount() { local VMID=$1 local HOST=$2 local LABEL=$3 local SSH_TARGET="${PROXMOX_USER}@${HOST}" log_info "Clearing $LABEL (VMID $VMID on $HOST) via pct stop + rootfs wipe..." # shellcheck disable=SC2087 if ssh -o ConnectTimeout=15 -o StrictHostKeyChecking=no "$SSH_TARGET" bash -s </dev/null || true for i in \$(seq 1 90); do if pct status "\$VMID" 2>/dev/null | grep -qi stopped; then echo " [pve] CT \$VMID stopped" break fi sleep 2 done if ! pct status "\$VMID" 2>/dev/null | grep -qi stopped; then echo " [pve] WARN: CT \$VMID not stopped after wait — cannot wipe pool safely" timeout 120 pct start "\$VMID" 2>/dev/null || true exit 1 fi sleep 2 # After pct stop, /var/lib/lxc//rootfs often exists but is empty until pct mount binds the CT disk (LVM/ZFS). # Using the path without mount caused silent no-ops on r630-03 mesh CTs (no "cleared under" lines). MP="/var/lib/lxc/\${VMID}/rootfs" pct unmount "\$VMID" 2>/dev/null || true if ! pct mount "\$VMID" 2>/dev/null; then echo " [pve] WARN: pct mount \$VMID failed — cannot wipe pool safely" timeout 120 pct start "\$VMID" 2>/dev/null || true exit 1 fi echo " [pve] pct mount bound \$MP" if [ ! -d "\$MP" ]; then echo " [pve] WARN: no rootfs at \$MP after mount" pct unmount "\$VMID" 2>/dev/null || true timeout 120 pct start "\$VMID" 2>/dev/null || true exit 1 fi for dd in "\$MP/data/besu" "\$MP/var/lib/besu" "\$MP/opt/besu"; do if [ -d "\$dd" ]; then find "\$dd" -type d -name "*pool*" -exec rm -rf {} \; 2>/dev/null || true find "\$dd" -type f -name "*transaction*" -delete 2>/dev/null || true find "\$dd" -type f -name "*txpool*" -delete 2>/dev/null || true echo " [pve] cleared under \$dd" fi done pct unmount "\$VMID" 2>/dev/null || true sleep 2 echo " [pve] pct start \$VMID" timeout 180 pct start "\$VMID" 2>/dev/null || true sleep 2 exit 0 EOF then log_success " Peer CT $VMID — pool wipe via mount complete" else log_warn " Peer CT $VMID — mount path had issues; confirm: ssh ${SSH_TARGET} 'pct status $VMID'" fi echo "" } # Peer tier first (when enabled) so validators/RPC are not refilled from sentries mid-run. if [[ "$CLEAR_BESU_PEER_TXPOOLS" == "1" ]]; then log_section "Clearing Sentry transaction pools (1500–1502) — pct mount on host" for vmid in 1500 1501 1502; do if ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no "${PROXMOX_USER}@${PROXMOX_R630}" \ "pct list | grep -q '^${vmid} '" 2>/dev/null; then clear_peer_txpool_via_pct_mount "$vmid" "$PROXMOX_R630" "Sentry" else log_warn "Sentry VMID $vmid not found on ${PROXMOX_R630}" fi done log_section "Clearing current edge RPC pools (2420/2430/2440/2460/2470/2480) — pct mount on host" for vmid in 2420 2430 2440 2460 2470 2480; do if ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no "${PROXMOX_USER}@${PROXMOX_R630}" \ "pct list | grep -q '^${vmid} '" 2>/dev/null; then clear_peer_txpool_via_pct_mount "$vmid" "$PROXMOX_R630" "Besu RPC (Alltra/Hybx)" else log_warn "VMID $vmid not found on ${PROXMOX_R630}" fi done # r630-03: validators 1003–1004, sentries 1503–1508, Core2 2102, Fireblocks 2301/2304, ThirdWeb stack 2400/2402/2403 log_section "Clearing Besu mesh on r630-03 (1503–1508, 2102, 2301, 2304, 2400–2403)" for vmid in 1503 1504 1505 1506 1507 1508 2102 2301 2304 2400 2402 2403; do if ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no "${PROXMOX_USER}@${R630_03}" \ "pct list | grep -q '^${vmid} '" 2>/dev/null; then clear_peer_txpool_via_pct_mount "$vmid" "$R630_03" "Besu mesh r630-03 VMID $vmid" else log_warn "VMID $vmid not found on ${R630_03}" fi done # r630-02: public 2201 + named RPC / ThirdWeb helper CTs (same P2P mesh) log_section "Clearing Besu mesh on r630-02 (2201, 2303, 2305–2308, 2401)" for vmid in 2201 2303 2305 2306 2307 2308 2401; do if ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no "${PROXMOX_USER}@${R630_02}" \ "pct list | grep -q '^${vmid} '" 2>/dev/null; then clear_peer_txpool_via_pct_mount "$vmid" "$R630_02" "Besu mesh r630-02 VMID $vmid" else log_warn "VMID $vmid not found on ${R630_02}" fi done else log_info "Skipping sentry + Alltra/Hybx pool clear (set CLEAR_BESU_PEER_TXPOOLS=1 if stuck txs reappear on Core after a clear)." fi # Clear validators log_section "Clearing Validator Transaction Pools" # Validators: 1000–1002 on r630-01; 1003–1004 on r630-03 (see ALL_VMIDS_ENDPOINTS.md). VALIDATORS=( "1000:$PROXMOX_R630:Validator" "1001:$PROXMOX_R630:Validator" "1002:$PROXMOX_R630:Validator" "1003:$R630_03:Validator" "1004:$R630_03:Validator" ) for validator in "${VALIDATORS[@]}"; do IFS=':' read -r VMID HOST TYPE <<< "$validator" clear_node_pool "$VMID" "$HOST" "$TYPE" done # Clear RPC Core (2101) log_section "Clearing RPC Transaction Pool (2101)" if ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no "${PROXMOX_USER}@${PROXMOX_ML110}" \ "pct list | grep -q '2101'" 2>/dev/null; then clear_node_pool 2101 "$PROXMOX_ML110" "RPC" elif ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no "${PROXMOX_USER}@${PROXMOX_R630}" \ "pct list | grep -q '2101'" 2>/dev/null; then clear_node_pool 2101 "$PROXMOX_R630" "RPC" else log_warn "RPC node (2101) not found on either host" fi # Clear RPC Core Thirdweb admin (2103) — r630-01 per ALL_VMIDS_ENDPOINTS.md log_section "Clearing RPC Core Thirdweb admin (2103)" if ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no "${PROXMOX_USER}@${PROXMOX_R630}" \ "pct list | grep -q '2103'" 2>/dev/null; then clear_node_pool 2103 "$PROXMOX_R630" "RPC Thirdweb Core" else log_warn "RPC Thirdweb Core (2103) not found on ${PROXMOX_R630}" fi # 2102 (r630-03) and 2201 (r630-02) are cleared in the peer-tier pct-mount pass when CLEAR_BESU_PEER_TXPOOLS=1 if [[ "$CLEAR_BESU_PEER_TXPOOLS" != "1" ]]; then log_section "Clearing RPC Core 2 (2102)" if ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no "${PROXMOX_USER}@${R630_03}" \ "pct list | grep -q '2102'" 2>/dev/null; then clear_node_pool 2102 "$R630_03" "RPC Core 2" else log_warn "RPC Core 2 (2102) not found on ${R630_03}" fi log_section "Clearing RPC Public (2201)" if ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no "${PROXMOX_USER}@${R630_02}" \ "pct list | grep -q '2201'" 2>/dev/null; then clear_node_pool 2201 "$R630_02" "RPC Public" else log_warn "RPC Public (2201) not found on ${R630_02}" fi else log_info "Skipping duplicate 2102/2201 clear_node_pool (already wiped in peer-tier pass)." fi log_section "Transaction Pool Clear Complete" echo "Next steps:" echo " 1. Wait 30-60 seconds for nodes to fully restart" echo " 2. Check pending transactions: bash scripts/verify/check-pending-transactions-chain138.sh" echo " 3. Monitor health: bash scripts/monitoring/monitor-blockchain-health.sh"