Sync workspace: config, docs, scripts, CI, operator rules, and submodule pointers.
- Update dbis_core, cross-chain-pmm-lps, explorer-monorepo, metamask-integration, pr-workspace/chains - Omit embedded publish git dirs and empty placeholders from index Made-with: Cursor
This commit is contained in:
@@ -6,15 +6,20 @@ set -euo pipefail
|
||||
|
||||
# Load IP configuration
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
||||
source "${PROJECT_ROOT}/config/ip-addresses.conf" 2>/dev/null || true
|
||||
|
||||
RPC_CORE_1="${RPC_CORE_1:-192.168.11.211}"
|
||||
RPC_URL="${RPC_URL:-http://${RPC_CORE_1}:8545}"
|
||||
DEPLOYER="${DEPLOYER:-0x4A666F96fC8764181194447A7dFdb7d471b301C8}"
|
||||
PROXMOX_USER="${PROXMOX_USER:-root}"
|
||||
PROXMOX_ML110="${PROXMOX_ML110:-${PROXMOX_HOST_ML110:-192.168.11.10}}"
|
||||
PROXMOX_R630="${PROXMOX_R630:-${PROXMOX_R630_01:-${PROXMOX_HOST_R630_01:-192.168.11.11}}}"
|
||||
R630_03="${PROXMOX_R630_03:-${PROXMOX_HOST_R630_03:-192.168.11.13}}"
|
||||
BLOCK_SAMPLE_SEC="${BLOCK_PRODUCTION_SAMPLE_SEC:-12}"
|
||||
# Proxmox shell SSH must be root@host — not API-style root@pam from .env (see ip-addresses.conf / clear-all-transaction-pools.sh).
|
||||
PROXMOX_SSH_USER="${PROXMOX_SSH_USER:-root}"
|
||||
[[ "$PROXMOX_SSH_USER" == *"@"* ]] && PROXMOX_SSH_USER="root"
|
||||
PVE_SSH_USER="$PROXMOX_SSH_USER"
|
||||
|
||||
# Colors
|
||||
RED='\033[0;31m'
|
||||
@@ -48,21 +53,21 @@ else
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Check block production
|
||||
# Check block production (longer window reduces false positives after restarts / ~2s block time)
|
||||
log_section "Block Production"
|
||||
BLOCK1=$(cast block-number --rpc-url "$RPC_URL" 2>/dev/null)
|
||||
sleep 5
|
||||
sleep "$BLOCK_SAMPLE_SEC"
|
||||
BLOCK2=$(cast block-number --rpc-url "$RPC_URL" 2>/dev/null)
|
||||
BLOCK1_DEC=$(cast --to-dec "$BLOCK1" 2>/dev/null || echo "0")
|
||||
BLOCK2_DEC=$(cast --to-dec "$BLOCK2" 2>/dev/null || echo "0")
|
||||
BLOCK_DIFF=$((BLOCK2_DEC - BLOCK1_DEC))
|
||||
|
||||
if [ "$BLOCK_DIFF" -gt 0 ]; then
|
||||
log_success "Blocks being produced ($BLOCK_DIFF blocks in 5s)"
|
||||
log_success "Blocks being produced ($BLOCK_DIFF blocks in ${BLOCK_SAMPLE_SEC}s)"
|
||||
else
|
||||
log_error "Block production stalled (no new blocks in 5s)"
|
||||
log_error "Block production stalled (no new blocks in ${BLOCK_SAMPLE_SEC}s)"
|
||||
# If validators are all active, they may still be syncing (QBFT does not produce until sync completes)
|
||||
SYNC_HINT=$(ssh -o ConnectTimeout=3 -o StrictHostKeyChecking=no "${PROXMOX_USER}@${PROXMOX_R630}" \
|
||||
SYNC_HINT=$(ssh -o ConnectTimeout=3 -o StrictHostKeyChecking=no "${PVE_SSH_USER}@${PROXMOX_R630}" \
|
||||
"pct exec 1000 -- journalctl -u besu-validator --no-pager -n 30 2>/dev/null" 2>/dev/null | grep -c "Full sync\|initial sync in progress\|QBFT mining coordinator not starting" || true)
|
||||
if [ "${SYNC_HINT:-0}" -gt 0 ]; then
|
||||
echo " → Validators may be syncing; block production will resume when sync completes (see docs/06-besu/CRITICAL_ISSUE_BLOCK_PRODUCTION_STOPPED.md)."
|
||||
@@ -86,8 +91,10 @@ done
|
||||
|
||||
if [ "$TX_COUNT_TOTAL" -gt 0 ]; then
|
||||
log_success "Transactions being included ($TX_COUNT_TOTAL txs in last 6 blocks)"
|
||||
elif [ "$BLOCK_DIFF" -gt 0 ]; then
|
||||
log_info "Last 6 sampled blocks had no user txs ($EMPTY_BLOCKS empty); chain is advancing (normal on quiet periods)"
|
||||
else
|
||||
log_warn "No transactions in last 6 blocks ($EMPTY_BLOCKS empty blocks)"
|
||||
log_warn "No transactions in last 6 blocks ($EMPTY_BLOCKS empty blocks) and no height gain in sample window"
|
||||
fi
|
||||
|
||||
# Check pending transactions
|
||||
@@ -104,23 +111,53 @@ else
|
||||
log_warn "$PENDING_COUNT pending transactions (nonces $((LATEST_DEC + 1))-$PENDING_DEC)"
|
||||
fi
|
||||
|
||||
# Check validator status
|
||||
# Check validator status (1000–1002 on r630-01; 1003–1004 on r630-03)
|
||||
log_section "Validator Status"
|
||||
VALIDATORS=(
|
||||
"1000:$PROXMOX_R630"
|
||||
"1001:$PROXMOX_R630"
|
||||
"1002:$PROXMOX_R630"
|
||||
"1003:$PROXMOX_ML110"
|
||||
"1004:$PROXMOX_ML110"
|
||||
"1003:$R630_03"
|
||||
"1004:$R630_03"
|
||||
)
|
||||
EXPECTED_VALIDATORS=${#VALIDATORS[@]}
|
||||
|
||||
SKIP_VALIDATOR_SSH=false
|
||||
SSH_R630_01_OK=0
|
||||
SSH_R630_03_OK=0
|
||||
ssh -o ConnectTimeout=4 -o BatchMode=yes -o StrictHostKeyChecking=no "${PVE_SSH_USER}@${PROXMOX_R630}" "true" 2>/dev/null && SSH_R630_01_OK=1
|
||||
ssh -o ConnectTimeout=4 -o BatchMode=yes -o StrictHostKeyChecking=no "${PVE_SSH_USER}@${R630_03}" "true" 2>/dev/null && SSH_R630_03_OK=1
|
||||
if [[ "$SSH_R630_01_OK" -eq 0 ]] && [[ "$SSH_R630_03_OK" -eq 0 ]]; then
|
||||
log_warn "Proxmox SSH unavailable for validator hosts — skipping validator CT checks (run from LAN)"
|
||||
SKIP_VALIDATOR_SSH=true
|
||||
fi
|
||||
|
||||
host_ssh_ok() {
|
||||
local h="$1"
|
||||
[[ "$h" == "$PROXMOX_R630" ]] && [[ "$SSH_R630_01_OK" -eq 1 ]] && return 0
|
||||
[[ "$h" == "$R630_03" ]] && [[ "$SSH_R630_03_OK" -eq 1 ]] && return 0
|
||||
return 1
|
||||
}
|
||||
|
||||
ACTIVE_COUNT=0
|
||||
REACHABLE_EXPECTED=0
|
||||
if [[ "$SKIP_VALIDATOR_SSH" == true ]]; then
|
||||
ACTIVE_COUNT=$EXPECTED_VALIDATORS
|
||||
REACHABLE_EXPECTED=$EXPECTED_VALIDATORS
|
||||
else
|
||||
for validator in "${VALIDATORS[@]}"; do
|
||||
IFS=':' read -r VMID HOST <<< "$validator"
|
||||
SSH_TARGET="${PROXMOX_USER}@${HOST}"
|
||||
if ! host_ssh_ok "$HOST"; then
|
||||
log_warn "Validator $VMID: skipped (SSH to $HOST unavailable)"
|
||||
continue
|
||||
fi
|
||||
REACHABLE_EXPECTED=$((REACHABLE_EXPECTED + 1))
|
||||
SSH_TARGET="${PVE_SSH_USER}@${HOST}"
|
||||
STATUS=$(ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no "$SSH_TARGET" \
|
||||
"pct exec $VMID -- systemctl is-active besu-validator" 2>/dev/null || echo "unknown")
|
||||
|
||||
"pct exec $VMID -- bash -c 'for u in besu-validator besu-validator.service; do s=\$(systemctl is-active \"\$u\" 2>/dev/null || true); [ \"\$s\" = active ] && { echo active; exit 0; }; done; echo inactive'" 2>/dev/null | tr -d '\r' | tail -1) \
|
||||
|| STATUS=""
|
||||
[[ -z "$STATUS" ]] && STATUS=unknown
|
||||
|
||||
if [ "$STATUS" = "active" ]; then
|
||||
ACTIVE_COUNT=$((ACTIVE_COUNT + 1))
|
||||
echo " Validator $VMID: $STATUS"
|
||||
@@ -128,11 +165,16 @@ for validator in "${VALIDATORS[@]}"; do
|
||||
log_warn "Validator $VMID: $STATUS"
|
||||
fi
|
||||
done
|
||||
fi
|
||||
|
||||
if [ "$ACTIVE_COUNT" -eq 5 ]; then
|
||||
log_success "All 5 validators active"
|
||||
if [[ "$SKIP_VALIDATOR_SSH" == true ]]; then
|
||||
:
|
||||
elif [[ "$REACHABLE_EXPECTED" -eq 0 ]]; then
|
||||
log_warn "No validator hosts reachable for systemd checks"
|
||||
elif [ "$ACTIVE_COUNT" -eq "$REACHABLE_EXPECTED" ]; then
|
||||
log_success "All $ACTIVE_COUNT/$REACHABLE_EXPECTED reachable validators active"
|
||||
else
|
||||
log_error "Only $ACTIVE_COUNT/5 validators active"
|
||||
log_error "Only $ACTIVE_COUNT/$REACHABLE_EXPECTED reachable validators active"
|
||||
fi
|
||||
|
||||
# Check peer connections
|
||||
@@ -152,7 +194,7 @@ if [ "$BLOCK_DIFF" -eq 0 ]; then
|
||||
log_error "❌ Block production stalled"
|
||||
ISSUES=$((ISSUES + 1))
|
||||
# Hint when validators are syncing (all active but no new blocks yet)
|
||||
if [ "$ACTIVE_COUNT" -eq 5 ]; then
|
||||
if [ "$ACTIVE_COUNT" -eq "$REACHABLE_EXPECTED" ] && [[ "$REACHABLE_EXPECTED" -gt 0 ]]; then
|
||||
echo " (If validators recently restarted: they are likely in full sync; blocks will resume when sync completes.)"
|
||||
fi
|
||||
else
|
||||
@@ -164,13 +206,17 @@ if [ "$TX_COUNT_TOTAL" -eq 0 ] && [ "$PENDING_COUNT" -gt 0 ]; then
|
||||
ISSUES=$((ISSUES + 1))
|
||||
elif [ "$TX_COUNT_TOTAL" -gt 0 ]; then
|
||||
log_success "✓ Transactions being included"
|
||||
elif [ "$BLOCK_DIFF" -gt 0 ]; then
|
||||
log_success "✓ No tx backlog signal (quiet blocks while height advances)"
|
||||
fi
|
||||
|
||||
if [ "$ACTIVE_COUNT" -lt 5 ]; then
|
||||
log_error "❌ Not all validators active"
|
||||
if [[ "$SKIP_VALIDATOR_SSH" == true ]]; then
|
||||
log_warn "⚠ Validator systemd checks skipped (no SSH)"
|
||||
elif [[ "$SKIP_VALIDATOR_SSH" != true ]] && [[ "$REACHABLE_EXPECTED" -gt 0 ]] && [ "$ACTIVE_COUNT" -lt "$REACHABLE_EXPECTED" ]; then
|
||||
log_error "❌ Not all reachable validators active"
|
||||
ISSUES=$((ISSUES + 1))
|
||||
else
|
||||
log_success "✓ All validators active"
|
||||
[[ "$SKIP_VALIDATOR_SSH" != true ]] && [[ "$REACHABLE_EXPECTED" -gt 0 ]] && log_success "✓ All reachable validators active"
|
||||
fi
|
||||
|
||||
if [ "$PENDING_COUNT" -gt 10 ]; then
|
||||
|
||||
@@ -22,10 +22,10 @@ scrape_configs:
|
||||
- '192.168.11.153:9545' # sentry-4 (DHCP assigned)
|
||||
labels:
|
||||
role: 'sentry'
|
||||
# RPC Nodes (VMID 2500-2502)
|
||||
# RPC Nodes (current production fleet)
|
||||
- targets:
|
||||
- '192.168.11.250:9545' # rpc-1 (DHCP assigned)
|
||||
- '192.168.11.251:9545' # rpc-2 (DHCP assigned)
|
||||
- '192.168.11.252:9545' # rpc-3 (DHCP assigned)
|
||||
- '192.168.11.211:9545' # besu-rpc-core-1
|
||||
- '192.168.11.221:9545' # besu-rpc-public-1
|
||||
- '192.168.11.240:9545' # thirdweb-rpc-1
|
||||
labels:
|
||||
role: 'rpc'
|
||||
|
||||
@@ -37,7 +37,7 @@ log_alert() {
|
||||
}
|
||||
|
||||
# Check all containers
|
||||
for vmid in 1000 1001 1002 1003 1004 1500 1501 1502 1503 2500 2501 2502; do
|
||||
for vmid in 1000 1001 1002 1003 1004 1500 1501 1502 1503 2101 2102 2103 2201 2301 2303 2304 2305 2306 2307 2308 2400 2401 2402 2403 2420 2430 2440 2460 2470 2480; do
|
||||
# Check if container is running
|
||||
if ! pct status "$vmid" 2>/dev/null | grep -q running; then
|
||||
log_alert "Container $vmid is not running"
|
||||
@@ -50,7 +50,7 @@ for vmid in 1000 1001 1002 1003 1004 1500 1501 1502 1503 2500 2501 2502; do
|
||||
service_name="besu-validator"
|
||||
elif [[ $vmid -ge 1500 ]] && [[ $vmid -le 1503 ]]; then
|
||||
service_name="besu-sentry"
|
||||
elif [[ $vmid -ge 2500 ]] && [[ $vmid -le 2502 ]]; then
|
||||
elif [[ $vmid -ge 2101 ]] && [[ $vmid -le 2480 ]]; then
|
||||
service_name="besu-rpc"
|
||||
fi
|
||||
|
||||
@@ -63,7 +63,7 @@ for vmid in 1000 1001 1002 1003 1004 1500 1501 1502 1503 2500 2501 2502; do
|
||||
done
|
||||
|
||||
# Check disk space (alert if < 10% free)
|
||||
for vmid in 1000 1001 1002 1003 1004 1500 1501 1502 1503 2500 2501 2502; do
|
||||
for vmid in 1000 1001 1002 1003 1004 1500 1501 1502 1503 2101 2102 2103 2201 2301 2303 2304 2305 2306 2307 2308 2400 2401 2402 2403 2420 2430 2440 2460 2470 2480; do
|
||||
if pct status "$vmid" 2>/dev/null | grep -q running; then
|
||||
disk_usage=$(pct exec "$vmid" -- df -h / | awk 'NR==2 {print $5}' | sed 's/%//' 2>/dev/null || echo "0")
|
||||
if [[ $disk_usage -gt 90 ]]; then
|
||||
|
||||
Reference in New Issue
Block a user