Some checks failed
Deploy to Phoenix / deploy (push) Has been cancelled
- ADD_CHAIN138_TO_LEDGER_LIVE: Ledger form done; public code review repo bis-innovations/LedgerLive; init/push commands - CONTRACT_DEPLOYMENT_RUNBOOK: Chain 138 gas price 1 gwei, 36-addr check, TransactionMirror workaround - CONTRACT_*: AddressMapper, MirrorManager deployed 2026-02-12; 36-address on-chain check - NEXT_STEPS_FOR_YOU: Ledger done; steps completable now (no LAN); run-completable-tasks-from-anywhere - MASTER_INDEX, OPERATOR_OPTIONAL, SMART_CONTRACTS_INVENTORY_SIMPLE: updates - LEDGER_BLOCKCHAIN_INTEGRATION_COMPLETE: bis-innovations/LedgerLive reference Co-authored-by: Cursor <cursoragent@cursor.com>
141 lines
3.8 KiB
Bash
Executable File
141 lines
3.8 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# Master Stability Monitor
|
|
# Orchestrates all monitoring and recovery operations
|
|
|
|
set -euo pipefail
|
|
|
|
# Load IP configuration
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
|
source "${PROJECT_ROOT}/config/ip-addresses.conf" 2>/dev/null || true
|
|
|
|
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
|
|
|
# Load environment
|
|
if [ -f "$PROJECT_ROOT/smom-dbis-138/.env" ]; then
|
|
set +e
|
|
source "$PROJECT_ROOT/smom-dbis-138/.env" 2>/dev/null || true
|
|
set -e
|
|
fi
|
|
|
|
# Colors
|
|
RED='\033[0;31m'
|
|
GREEN='\033[0;32m'
|
|
YELLOW='\033[1;33m'
|
|
BLUE='\033[0;34m'
|
|
CYAN='\033[0;36m'
|
|
NC='\033[0m'
|
|
|
|
log_info() { echo -e "${BLUE}[INFO]${NC} $1"; }
|
|
log_success() { echo -e "${GREEN}[✓]${NC} $1"; }
|
|
log_warn() { echo -e "${YELLOW}[⚠]${NC} $1"; }
|
|
log_error() { echo -e "${RED}[✗]${NC} $1"; }
|
|
log_section() { echo -e "\n${CYAN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"; echo -e "${CYAN}$1${NC}"; echo -e "${CYAN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}\n"; }
|
|
|
|
# Configuration
|
|
CHECK_INTERVAL=120 # Check every 2 minutes
|
|
AUTO_FIX=true
|
|
AUTO_RESTART=true
|
|
|
|
run_health_check() {
|
|
log_section "Running Health Check"
|
|
if bash "$SCRIPT_DIR/check-validator-health.sh" 2>&1; then
|
|
log_success "Health check passed"
|
|
return 0
|
|
else
|
|
log_error "Health check failed"
|
|
return 1
|
|
fi
|
|
}
|
|
|
|
run_auto_fix() {
|
|
log_section "Running Auto-Fix"
|
|
if bash "$SCRIPT_DIR/auto-fix-validator-config.sh" 2>&1; then
|
|
log_success "Auto-fix completed"
|
|
return 0
|
|
else
|
|
log_warn "Auto-fix had issues"
|
|
return 1
|
|
fi
|
|
}
|
|
|
|
check_block_production() {
|
|
log_section "Checking Block Production"
|
|
|
|
local rpc_url="${RPC_URL_138:-http://${RPC_CORE_1}:8545}"
|
|
local block1=$(cast block-number --rpc-url "$rpc_url" 2>/dev/null || echo "0")
|
|
|
|
sleep 10
|
|
|
|
local block2=$(cast block-number --rpc-url "$rpc_url" 2>/dev/null || echo "0")
|
|
|
|
if [ "$block1" != "$block2" ] && [ "$block2" != "0" ] && [ "$block1" != "" ]; then
|
|
log_success "Block production active ($block1 → $block2)"
|
|
return 0
|
|
else
|
|
log_error "Block production STALLED (block: $block1)"
|
|
return 1
|
|
fi
|
|
}
|
|
|
|
main() {
|
|
log_section "Master Stability Monitor"
|
|
log_info "Starting comprehensive stability monitoring..."
|
|
echo ""
|
|
|
|
local health_ok=true
|
|
local blocks_ok=true
|
|
|
|
# Run health check
|
|
if ! run_health_check; then
|
|
health_ok=false
|
|
|
|
# Auto-fix if enabled
|
|
if [ "$AUTO_FIX" = true ]; then
|
|
log_warn "Attempting automatic fix..."
|
|
run_auto_fix
|
|
|
|
# Re-check health
|
|
sleep 30
|
|
if run_health_check; then
|
|
log_success "Auto-fix resolved issues"
|
|
health_ok=true
|
|
else
|
|
log_error "Auto-fix did not resolve issues"
|
|
fi
|
|
fi
|
|
fi
|
|
|
|
# Check block production
|
|
if ! check_block_production; then
|
|
blocks_ok=false
|
|
log_error "CRITICAL: Block production stalled"
|
|
fi
|
|
|
|
# Summary
|
|
log_section "Monitoring Summary"
|
|
|
|
if [ "$health_ok" = true ] && [ "$blocks_ok" = true ]; then
|
|
log_success "All systems operational"
|
|
exit 0
|
|
elif [ "$blocks_ok" = false ]; then
|
|
log_error "CRITICAL: Block production issue detected"
|
|
exit 2
|
|
else
|
|
log_warn "Non-critical issues detected"
|
|
exit 1
|
|
fi
|
|
}
|
|
|
|
# Run continuously if no arguments
|
|
if [ "${1:-}" = "--once" ]; then
|
|
main
|
|
else
|
|
while true; do
|
|
main
|
|
sleep "$CHECK_INTERVAL"
|
|
done
|
|
fi
|