- Update dbis_core, cross-chain-pmm-lps, explorer-monorepo, metamask-integration, pr-workspace/chains - Omit embedded publish git dirs and empty placeholders from index Made-with: Cursor
309 lines
13 KiB
Bash
Executable File
309 lines
13 KiB
Bash
Executable File
#!/usr/bin/env bash
|
||
# Clear transaction pools on all Besu nodes (validators, Core/Thirdweb/public RPC).
|
||
#
|
||
# Stuck txs often reappear on Core when sentries or Alltra/Hybx RPC CTs (still on the P2P mesh) were not cleared.
|
||
# To include those: CLEAR_BESU_PEER_TXPOOLS=1 bash scripts/clear-all-transaction-pools.sh
|
||
# Peer tier (1500–1502, 2420/2430/2440/2460/2470/2480) uses pct stop + pct mount on the PVE host (avoids hung systemctl inside flaky CTs).
|
||
#
|
||
# SSH: uses PROXMOX_SSH_USER from config/ip-addresses.conf (root). If .env sets PROXMOX_USER=root@pam for the API,
|
||
# that value is NOT used for SSH (see PROXMOX_USER= assignment below).
|
||
# Afterward: Core RPC (2101) can take several minutes to bind JSON-RPC while RocksDB opens/compacts; public RPC may recover first.
|
||
|
||
set -euo pipefail
|
||
|
||
# Load IP configuration
|
||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||
PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||
source "${PROJECT_ROOT}/config/ip-addresses.conf" 2>/dev/null || true
|
||
|
||
# Peer-tier Besu CTs (sentries, Alltra/Hybx RPC) retain mempool and can re-gossip txs to Core after a clear.
|
||
# Default: skip them (faster). To flush those pools too: CLEAR_BESU_PEER_TXPOOLS=1 bash scripts/clear-all-transaction-pools.sh
|
||
CLEAR_BESU_PEER_TXPOOLS="${CLEAR_BESU_PEER_TXPOOLS:-0}"
|
||
|
||
# Shell SSH must be root@host — not root@pam@host (.env often sets PROXMOX_USER=root@pam for API).
|
||
PROXMOX_SSH_USER="${PROXMOX_SSH_USER:-root}"
|
||
[[ "$PROXMOX_SSH_USER" == *"@"* ]] && PROXMOX_SSH_USER="root"
|
||
PROXMOX_USER="$PROXMOX_SSH_USER"
|
||
PROXMOX_ML110="${PROXMOX_ML110:-${PROXMOX_HOST_ML110:-192.168.11.10}}"
|
||
PROXMOX_R630="${PROXMOX_R630:-${PROXMOX_R630_01:-${PROXMOX_HOST_R630_01:-192.168.11.11}}}"
|
||
R630_03="${PROXMOX_R630_03:-${PROXMOX_HOST_R630_03:-192.168.11.13}}"
|
||
R630_02="${PROXMOX_R630_02:-${PROXMOX_HOST_R630_02:-192.168.11.12}}"
|
||
|
||
# Colors
|
||
RED='\033[0;31m'
|
||
GREEN='\033[0;32m'
|
||
YELLOW='\033[1;33m'
|
||
BLUE='\033[0;34m'
|
||
CYAN='\033[0;36m'
|
||
NC='\033[0m'
|
||
|
||
log_info() { echo -e "${BLUE}[INFO]${NC} $1"; }
|
||
log_success() { echo -e "${GREEN}[✓]${NC} $1"; }
|
||
log_warn() { echo -e "${YELLOW}[⚠]${NC} $1"; }
|
||
log_error() { echo -e "${RED}[✗]${NC} $1"; }
|
||
log_section() { echo -e "\n${CYAN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"; echo -e "${CYAN}$1${NC}"; echo -e "${CYAN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}\n"; }
|
||
|
||
echo "=== Clear Transaction Pools on All Nodes ==="
|
||
echo ""
|
||
|
||
# Function to clear transaction pool for a node
|
||
clear_node_pool() {
|
||
local VMID=$1
|
||
local HOST=$2
|
||
local NODE_TYPE=$3
|
||
local SSH_TARGET="${PROXMOX_USER}@${HOST}"
|
||
|
||
log_info "Clearing transaction pool for $NODE_TYPE (VMID $VMID on $HOST)..."
|
||
|
||
# Stop the service (timeout each stop — Besu can hang on SIGTERM during heavy I/O)
|
||
log_info " Stopping service..."
|
||
ssh -o ConnectTimeout=10 -o StrictHostKeyChecking=no "$SSH_TARGET" \
|
||
"pct exec $VMID -- bash -c '
|
||
stop_besu() {
|
||
local n=\"\$1\"
|
||
timeout 90 systemctl stop \"\${n}.service\" 2>/dev/null || timeout 90 systemctl stop \"\$n\" 2>/dev/null || true
|
||
}
|
||
stop_besu besu-validator
|
||
stop_besu besu-rpc-core
|
||
stop_besu besu-rpc
|
||
stop_besu besu-sentry
|
||
'" 2>&1 | grep -v "Configuration file" || true
|
||
|
||
sleep 2
|
||
|
||
# Find and clear transaction pool database
|
||
log_info " Clearing transaction pool database..."
|
||
CLEAR_RESULT=$(ssh -o ConnectTimeout=10 -o StrictHostKeyChecking=no "$SSH_TARGET" \
|
||
"pct exec $VMID -- bash -c '
|
||
DATA_DIRS=\"/data/besu /var/lib/besu\"
|
||
for DATA_DIR in \$DATA_DIRS; do
|
||
if [ -d \"\$DATA_DIR\" ]; then
|
||
# Find transaction pool database files
|
||
find \"\$DATA_DIR\" -type d -name \"*pool*\" -exec rm -rf {} \; 2>/dev/null || true
|
||
find \"\$DATA_DIR\" -type f -name \"*transaction*\" -delete 2>/dev/null || true
|
||
find \"\$DATA_DIR\" -type f -name \"*txpool*\" -delete 2>/dev/null || true
|
||
echo \"Cleared: \$DATA_DIR\"
|
||
fi
|
||
done
|
||
'" 2>&1 | grep -v "Configuration file" || echo "Cleared")
|
||
|
||
if [ -n "$CLEAR_RESULT" ]; then
|
||
log_success " Transaction pool cleared"
|
||
else
|
||
log_warn " Could not clear transaction pool (may not exist)"
|
||
fi
|
||
|
||
# Restart the service (first successful start wins; timeout avoids indefinite hang)
|
||
log_info " Restarting service..."
|
||
ssh -o ConnectTimeout=10 -o StrictHostKeyChecking=no "$SSH_TARGET" \
|
||
"pct exec $VMID -- bash -c '
|
||
timeout 120 systemctl start besu-validator.service 2>/dev/null || timeout 120 systemctl start besu-validator 2>/dev/null || \
|
||
timeout 120 systemctl start besu-rpc-core.service 2>/dev/null || timeout 120 systemctl start besu-rpc-core 2>/dev/null || \
|
||
timeout 120 systemctl start besu-rpc.service 2>/dev/null || timeout 120 systemctl start besu-rpc 2>/dev/null || \
|
||
timeout 120 systemctl start besu-sentry.service 2>/dev/null || timeout 120 systemctl start besu-sentry 2>/dev/null || true
|
||
'" 2>&1 | grep -v "Configuration file" || true
|
||
|
||
sleep 3
|
||
|
||
# Verify at least one Besu unit is active (single line — avoids inactive\\ninactive\\nactive noise)
|
||
STATUS=$(ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no "$SSH_TARGET" \
|
||
"pct exec $VMID -- bash -c '
|
||
for u in besu-validator besu-rpc-core besu-rpc besu-sentry; do
|
||
for s in \"\$u\" \"\${u}.service\"; do
|
||
st=\$(systemctl is-active \"\$s\" 2>/dev/null || true)
|
||
[ \"\$st\" = active ] && { echo active; exit 0; }
|
||
done
|
||
done
|
||
echo inactive
|
||
'" 2>&1 | grep -v "Configuration file" | tr -d '\r' | tail -1) \
|
||
|| STATUS="unknown"
|
||
|
||
if [ "$STATUS" = "active" ]; then
|
||
log_success " Service restarted and active"
|
||
else
|
||
log_warn " Service status: $STATUS"
|
||
fi
|
||
|
||
echo ""
|
||
}
|
||
|
||
# Clear txpool files while CT is stopped — mount rootfs from PVE host (reliable for sentries / Alltra RPC).
|
||
clear_peer_txpool_via_pct_mount() {
|
||
local VMID=$1
|
||
local HOST=$2
|
||
local LABEL=$3
|
||
local SSH_TARGET="${PROXMOX_USER}@${HOST}"
|
||
|
||
log_info "Clearing $LABEL (VMID $VMID on $HOST) via pct stop + rootfs wipe..."
|
||
# shellcheck disable=SC2087
|
||
if ssh -o ConnectTimeout=15 -o StrictHostKeyChecking=no "$SSH_TARGET" bash -s <<EOF
|
||
set +e
|
||
VMID=$VMID
|
||
echo " [pve] pct stop \$VMID (timeout 300s)"
|
||
timeout 300 pct stop "\$VMID" 2>/dev/null || true
|
||
for i in \$(seq 1 90); do
|
||
if pct status "\$VMID" 2>/dev/null | grep -qi stopped; then
|
||
echo " [pve] CT \$VMID stopped"
|
||
break
|
||
fi
|
||
sleep 2
|
||
done
|
||
if ! pct status "\$VMID" 2>/dev/null | grep -qi stopped; then
|
||
echo " [pve] WARN: CT \$VMID not stopped after wait — cannot wipe pool safely"
|
||
timeout 120 pct start "\$VMID" 2>/dev/null || true
|
||
exit 1
|
||
fi
|
||
sleep 2
|
||
# After pct stop, /var/lib/lxc/<vmid>/rootfs often exists but is empty until pct mount binds the CT disk (LVM/ZFS).
|
||
# Using the path without mount caused silent no-ops on r630-03 mesh CTs (no "cleared under" lines).
|
||
MP="/var/lib/lxc/\${VMID}/rootfs"
|
||
pct unmount "\$VMID" 2>/dev/null || true
|
||
if ! pct mount "\$VMID" 2>/dev/null; then
|
||
echo " [pve] WARN: pct mount \$VMID failed — cannot wipe pool safely"
|
||
timeout 120 pct start "\$VMID" 2>/dev/null || true
|
||
exit 1
|
||
fi
|
||
echo " [pve] pct mount bound \$MP"
|
||
if [ ! -d "\$MP" ]; then
|
||
echo " [pve] WARN: no rootfs at \$MP after mount"
|
||
pct unmount "\$VMID" 2>/dev/null || true
|
||
timeout 120 pct start "\$VMID" 2>/dev/null || true
|
||
exit 1
|
||
fi
|
||
for dd in "\$MP/data/besu" "\$MP/var/lib/besu" "\$MP/opt/besu"; do
|
||
if [ -d "\$dd" ]; then
|
||
find "\$dd" -type d -name "*pool*" -exec rm -rf {} \; 2>/dev/null || true
|
||
find "\$dd" -type f -name "*transaction*" -delete 2>/dev/null || true
|
||
find "\$dd" -type f -name "*txpool*" -delete 2>/dev/null || true
|
||
echo " [pve] cleared under \$dd"
|
||
fi
|
||
done
|
||
pct unmount "\$VMID" 2>/dev/null || true
|
||
sleep 2
|
||
echo " [pve] pct start \$VMID"
|
||
timeout 180 pct start "\$VMID" 2>/dev/null || true
|
||
sleep 2
|
||
exit 0
|
||
EOF
|
||
then
|
||
log_success " Peer CT $VMID — pool wipe via mount complete"
|
||
else
|
||
log_warn " Peer CT $VMID — mount path had issues; confirm: ssh ${SSH_TARGET} 'pct status $VMID'"
|
||
fi
|
||
echo ""
|
||
}
|
||
|
||
# Peer tier first (when enabled) so validators/RPC are not refilled from sentries mid-run.
|
||
if [[ "$CLEAR_BESU_PEER_TXPOOLS" == "1" ]]; then
|
||
log_section "Clearing Sentry transaction pools (1500–1502) — pct mount on host"
|
||
for vmid in 1500 1501 1502; do
|
||
if ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no "${PROXMOX_USER}@${PROXMOX_R630}" \
|
||
"pct list | grep -q '^${vmid} '" 2>/dev/null; then
|
||
clear_peer_txpool_via_pct_mount "$vmid" "$PROXMOX_R630" "Sentry"
|
||
else
|
||
log_warn "Sentry VMID $vmid not found on ${PROXMOX_R630}"
|
||
fi
|
||
done
|
||
|
||
log_section "Clearing current edge RPC pools (2420/2430/2440/2460/2470/2480) — pct mount on host"
|
||
for vmid in 2420 2430 2440 2460 2470 2480; do
|
||
if ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no "${PROXMOX_USER}@${PROXMOX_R630}" \
|
||
"pct list | grep -q '^${vmid} '" 2>/dev/null; then
|
||
clear_peer_txpool_via_pct_mount "$vmid" "$PROXMOX_R630" "Besu RPC (Alltra/Hybx)"
|
||
else
|
||
log_warn "VMID $vmid not found on ${PROXMOX_R630}"
|
||
fi
|
||
done
|
||
|
||
# r630-03: validators 1003–1004, sentries 1503–1508, Core2 2102, Fireblocks 2301/2304, ThirdWeb stack 2400/2402/2403
|
||
log_section "Clearing Besu mesh on r630-03 (1503–1508, 2102, 2301, 2304, 2400–2403)"
|
||
for vmid in 1503 1504 1505 1506 1507 1508 2102 2301 2304 2400 2402 2403; do
|
||
if ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no "${PROXMOX_USER}@${R630_03}" \
|
||
"pct list | grep -q '^${vmid} '" 2>/dev/null; then
|
||
clear_peer_txpool_via_pct_mount "$vmid" "$R630_03" "Besu mesh r630-03 VMID $vmid"
|
||
else
|
||
log_warn "VMID $vmid not found on ${R630_03}"
|
||
fi
|
||
done
|
||
|
||
# r630-02: public 2201 + named RPC / ThirdWeb helper CTs (same P2P mesh)
|
||
log_section "Clearing Besu mesh on r630-02 (2201, 2303, 2305–2308, 2401)"
|
||
for vmid in 2201 2303 2305 2306 2307 2308 2401; do
|
||
if ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no "${PROXMOX_USER}@${R630_02}" \
|
||
"pct list | grep -q '^${vmid} '" 2>/dev/null; then
|
||
clear_peer_txpool_via_pct_mount "$vmid" "$R630_02" "Besu mesh r630-02 VMID $vmid"
|
||
else
|
||
log_warn "VMID $vmid not found on ${R630_02}"
|
||
fi
|
||
done
|
||
else
|
||
log_info "Skipping sentry + Alltra/Hybx pool clear (set CLEAR_BESU_PEER_TXPOOLS=1 if stuck txs reappear on Core after a clear)."
|
||
fi
|
||
|
||
# Clear validators
|
||
log_section "Clearing Validator Transaction Pools"
|
||
|
||
# Validators: 1000–1002 on r630-01; 1003–1004 on r630-03 (see ALL_VMIDS_ENDPOINTS.md).
|
||
VALIDATORS=(
|
||
"1000:$PROXMOX_R630:Validator"
|
||
"1001:$PROXMOX_R630:Validator"
|
||
"1002:$PROXMOX_R630:Validator"
|
||
"1003:$R630_03:Validator"
|
||
"1004:$R630_03:Validator"
|
||
)
|
||
|
||
for validator in "${VALIDATORS[@]}"; do
|
||
IFS=':' read -r VMID HOST TYPE <<< "$validator"
|
||
clear_node_pool "$VMID" "$HOST" "$TYPE"
|
||
done
|
||
|
||
# Clear RPC Core (2101)
|
||
log_section "Clearing RPC Transaction Pool (2101)"
|
||
|
||
if ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no "${PROXMOX_USER}@${PROXMOX_ML110}" \
|
||
"pct list | grep -q '2101'" 2>/dev/null; then
|
||
clear_node_pool 2101 "$PROXMOX_ML110" "RPC"
|
||
elif ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no "${PROXMOX_USER}@${PROXMOX_R630}" \
|
||
"pct list | grep -q '2101'" 2>/dev/null; then
|
||
clear_node_pool 2101 "$PROXMOX_R630" "RPC"
|
||
else
|
||
log_warn "RPC node (2101) not found on either host"
|
||
fi
|
||
|
||
# Clear RPC Core Thirdweb admin (2103) — r630-01 per ALL_VMIDS_ENDPOINTS.md
|
||
log_section "Clearing RPC Core Thirdweb admin (2103)"
|
||
if ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no "${PROXMOX_USER}@${PROXMOX_R630}" \
|
||
"pct list | grep -q '2103'" 2>/dev/null; then
|
||
clear_node_pool 2103 "$PROXMOX_R630" "RPC Thirdweb Core"
|
||
else
|
||
log_warn "RPC Thirdweb Core (2103) not found on ${PROXMOX_R630}"
|
||
fi
|
||
|
||
# 2102 (r630-03) and 2201 (r630-02) are cleared in the peer-tier pct-mount pass when CLEAR_BESU_PEER_TXPOOLS=1
|
||
if [[ "$CLEAR_BESU_PEER_TXPOOLS" != "1" ]]; then
|
||
log_section "Clearing RPC Core 2 (2102)"
|
||
if ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no "${PROXMOX_USER}@${R630_03}" \
|
||
"pct list | grep -q '2102'" 2>/dev/null; then
|
||
clear_node_pool 2102 "$R630_03" "RPC Core 2"
|
||
else
|
||
log_warn "RPC Core 2 (2102) not found on ${R630_03}"
|
||
fi
|
||
|
||
log_section "Clearing RPC Public (2201)"
|
||
if ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no "${PROXMOX_USER}@${R630_02}" \
|
||
"pct list | grep -q '2201'" 2>/dev/null; then
|
||
clear_node_pool 2201 "$R630_02" "RPC Public"
|
||
else
|
||
log_warn "RPC Public (2201) not found on ${R630_02}"
|
||
fi
|
||
else
|
||
log_info "Skipping duplicate 2102/2201 clear_node_pool (already wiped in peer-tier pass)."
|
||
fi
|
||
|
||
log_section "Transaction Pool Clear Complete"
|
||
|
||
echo "Next steps:"
|
||
echo " 1. Wait 30-60 seconds for nodes to fully restart"
|
||
echo " 2. Check pending transactions: bash scripts/verify/check-pending-transactions-chain138.sh"
|
||
echo " 3. Monitor health: bash scripts/monitoring/monitor-blockchain-health.sh"
|