- Organized 252 files across project - Root directory: 187 → 2 files (98.9% reduction) - Moved configuration guides to docs/04-configuration/ - Moved troubleshooting guides to docs/09-troubleshooting/ - Moved quick start guides to docs/01-getting-started/ - Moved reports to reports/ directory - Archived temporary files - Generated comprehensive reports and documentation - Created maintenance scripts and guides All files organized according to established standards.
283 lines
11 KiB
Bash
Executable File
283 lines
11 KiB
Bash
Executable File
#!/usr/bin/env bash
|
||
# Check all Validator and Sentry node logs for errors
|
||
# Validators: VMIDs 1000-1004
|
||
# Sentries: VMIDs 1500-1503
|
||
|
||
set -euo pipefail
|
||
|
||
# Colors
|
||
RED='\033[0;31m'
|
||
GREEN='\033[0;32m'
|
||
YELLOW='\033[1;33m'
|
||
BLUE='\033[0;34m'
|
||
NC='\033[0m' # No Color
|
||
|
||
# Proxmox host configuration
|
||
PROXMOX_HOST="${PROXMOX_HOST:-192.168.11.10}"
|
||
SSH_PASSWORD="${SSH_PASSWORD:-L@kers2010}"
|
||
|
||
# Node IP mappings
|
||
declare -A NODE_IPS=(
|
||
[1000]="192.168.11.100"
|
||
[1001]="192.168.11.101"
|
||
[1002]="192.168.11.102"
|
||
[1003]="192.168.11.103"
|
||
[1004]="192.168.11.104"
|
||
[1500]="192.168.11.150"
|
||
[1501]="192.168.11.151"
|
||
[1502]="192.168.11.152"
|
||
[1503]="192.168.11.153"
|
||
)
|
||
|
||
# Node definitions
|
||
VALIDATORS=(1000 1001 1002 1003 1004)
|
||
SENTRIES=(1500 1501 1502 1503)
|
||
LOG_LINES="${1:-100}"
|
||
|
||
# Check if sshpass is available
|
||
if ! command -v sshpass >/dev/null 2>&1; then
|
||
echo "⚠️ sshpass not installed. Attempting to install..."
|
||
sudo apt-get update -qq && sudo apt-get install -y sshpass 2>/dev/null || {
|
||
echo "❌ Cannot install sshpass automatically"
|
||
echo "Please install manually: sudo apt-get install sshpass"
|
||
exit 1
|
||
}
|
||
fi
|
||
|
||
# Error patterns to search for
|
||
ERROR_PATTERNS=(
|
||
"error"
|
||
"Error"
|
||
"ERROR"
|
||
"failed"
|
||
"Failed"
|
||
"FAILED"
|
||
"exception"
|
||
"Exception"
|
||
"EXCEPTION"
|
||
"fatal"
|
||
"Fatal"
|
||
"FATAL"
|
||
"panic"
|
||
"Panic"
|
||
"PANIC"
|
||
"Unable to read"
|
||
"file not found"
|
||
"configuration"
|
||
"restart"
|
||
"crash"
|
||
"timeout"
|
||
"Timeout"
|
||
"connection refused"
|
||
"Connection refused"
|
||
)
|
||
|
||
echo -e "${BLUE}╔══════════════════════════════════════════════════════════════╗${NC}"
|
||
echo -e "${BLUE}║ CHECKING ALL VALIDATOR AND SENTRY NODE LOGS ║${NC}"
|
||
echo -e "${BLUE}╚══════════════════════════════════════════════════════════════╝${NC}"
|
||
echo ""
|
||
echo "Checking last $LOG_LINES lines of logs for each node"
|
||
echo ""
|
||
|
||
# Function to check logs for a node
|
||
check_node_logs() {
|
||
local vmid=$1
|
||
local service_name=$2
|
||
local node_type=$3
|
||
|
||
echo -e "${BLUE}═══════════════════════════════════════════════════════════════${NC}"
|
||
echo -e "${BLUE}Checking ${node_type} VMID $vmid (service: $service_name)${NC}"
|
||
echo -e "${BLUE}═══════════════════════════════════════════════════════════════${NC}"
|
||
|
||
# Get container IP
|
||
local container_ip="${NODE_IPS[$vmid]}"
|
||
if [ -z "$container_ip" ]; then
|
||
echo -e "${RED}❌ VMID $vmid: IP address not found in mapping${NC}"
|
||
echo ""
|
||
return 1
|
||
fi
|
||
|
||
# Try to access container directly via SSH first
|
||
local logs=""
|
||
local service_status="unknown"
|
||
|
||
# Check if we can access via Proxmox host (preferred method)
|
||
if ssh -o StrictHostKeyChecking=accept-new -o ConnectTimeout=3 -i ~/.ssh/id_ed25519_proxmox "root@${PROXMOX_HOST}" "pct status $vmid 2>/dev/null" &>/dev/null; then
|
||
# Access via Proxmox host
|
||
local status_output
|
||
status_output=$(ssh -o StrictHostKeyChecking=accept-new -o ConnectTimeout=5 -i ~/.ssh/id_ed25519_proxmox "root@${PROXMOX_HOST}" \
|
||
"pct status $vmid 2>/dev/null" || echo "")
|
||
|
||
if [ -z "$status_output" ]; then
|
||
echo -e "${RED}❌ VMID $vmid: Container not found or not accessible${NC}"
|
||
echo ""
|
||
return 1
|
||
fi
|
||
|
||
local status=$(echo "$status_output" | awk '{print $2}' || echo "unknown")
|
||
if [ "$status" != "running" ]; then
|
||
echo -e "${YELLOW}⚠️ VMID $vmid: Container is not running (status: $status)${NC}"
|
||
echo ""
|
||
return 1
|
||
fi
|
||
|
||
# Check service status
|
||
service_status=$(ssh -o StrictHostKeyChecking=accept-new -o ConnectTimeout=5 -i ~/.ssh/id_ed25519_proxmox "root@${PROXMOX_HOST}" \
|
||
"pct exec $vmid -- systemctl is-active $service_name.service 2>/dev/null" || echo "inactive")
|
||
|
||
# Get recent logs
|
||
logs=$(ssh -o StrictHostKeyChecking=accept-new -o ConnectTimeout=5 -i ~/.ssh/id_ed25519_proxmox "root@${PROXMOX_HOST}" \
|
||
"pct exec $vmid -- journalctl -u $service_name.service -n $LOG_LINES --no-pager 2>/dev/null" || echo "")
|
||
else
|
||
# Fallback: Try direct SSH to container
|
||
echo -e "${YELLOW}⚠️ Cannot access via Proxmox host, trying direct SSH to container...${NC}"
|
||
|
||
# Check service status via direct SSH
|
||
service_status=$(sshpass -p "$SSH_PASSWORD" ssh -o StrictHostKeyChecking=accept-new -o ConnectTimeout=5 \
|
||
"root@${container_ip}" \
|
||
"systemctl is-active $service_name.service 2>/dev/null" || echo "inactive")
|
||
|
||
# Get recent logs via direct SSH
|
||
logs=$(sshpass -p "$SSH_PASSWORD" ssh -o StrictHostKeyChecking=accept-new -o ConnectTimeout=5 \
|
||
"root@${container_ip}" \
|
||
"journalctl -u $service_name.service -n $LOG_LINES --no-pager 2>/dev/null" || echo "")
|
||
fi
|
||
|
||
if [ "$service_status" != "active" ]; then
|
||
echo -e "${YELLOW}⚠️ Service $service_name is not active (status: $service_status)${NC}"
|
||
else
|
||
echo -e "${GREEN}✅ Service $service_name is active${NC}"
|
||
fi
|
||
|
||
# Get recent logs
|
||
echo ""
|
||
echo "Recent logs (last $LOG_LINES lines):"
|
||
echo "---"
|
||
|
||
if [ -z "$logs" ]; then
|
||
echo -e "${YELLOW}⚠️ No logs found for service $service_name${NC}"
|
||
echo ""
|
||
return 1
|
||
fi
|
||
|
||
# Display logs
|
||
echo "$logs"
|
||
echo "---"
|
||
echo ""
|
||
|
||
# Check for errors
|
||
echo "Checking for errors..."
|
||
local error_found=false
|
||
local error_count=0
|
||
|
||
for pattern in "${ERROR_PATTERNS[@]}"; do
|
||
local matches=$(echo "$logs" | grep -i "$pattern" | grep -v "restart counter" | grep -v "Scheduled restart" | grep -v "CORS Rejected" || true)
|
||
if [ -n "$matches" ]; then
|
||
local match_count=$(echo "$matches" | wc -l)
|
||
error_count=$((error_count + match_count))
|
||
if [ "$error_found" = false ]; then
|
||
error_found=true
|
||
echo -e "${RED}❌ ERRORS FOUND:${NC}"
|
||
fi
|
||
echo -e "${RED} Pattern '$pattern' found $match_count time(s):${NC}"
|
||
echo "$matches" | head -5 | sed 's/^/ /'
|
||
if [ "$match_count" -gt 5 ]; then
|
||
echo -e "${YELLOW} ... and $((match_count - 5)) more occurrence(s)${NC}"
|
||
fi
|
||
fi
|
||
done
|
||
|
||
# Check restart count
|
||
local restart_count=$(echo "$logs" | grep -i "restart counter" | tail -1 | grep -oP 'restart counter is at \K\d+' || echo "0")
|
||
if [ "$restart_count" != "0" ] && [ -n "$restart_count" ]; then
|
||
if [ "$restart_count" -gt 10 ]; then
|
||
echo -e "${RED}⚠️ High restart count: $restart_count${NC}"
|
||
error_found=true
|
||
elif [ "$restart_count" -gt 0 ]; then
|
||
echo -e "${YELLOW}ℹ️ Restart count: $restart_count${NC}"
|
||
fi
|
||
fi
|
||
|
||
echo ""
|
||
|
||
if [ "$error_found" = false ]; then
|
||
echo -e "${GREEN}✅ No errors found in recent logs${NC}"
|
||
return 0
|
||
else
|
||
echo -e "${RED}❌ Total error occurrences: $error_count${NC}"
|
||
return 1
|
||
fi
|
||
}
|
||
|
||
# Summary tracking
|
||
total_validators=0
|
||
total_sentries=0
|
||
validators_with_errors=0
|
||
sentries_with_errors=0
|
||
validators_checked=0
|
||
sentries_checked=0
|
||
|
||
# Check all Validator nodes
|
||
echo -e "${BLUE}╔══════════════════════════════════════════════════════════════╗${NC}"
|
||
echo -e "${BLUE}║ VALIDATOR NODES (VMIDs 1000-1004) ║${NC}"
|
||
echo -e "${BLUE}╚══════════════════════════════════════════════════════════════╝${NC}"
|
||
echo ""
|
||
|
||
for vmid in "${VALIDATORS[@]}"; do
|
||
if check_node_logs "$vmid" "besu-validator" "Validator"; then
|
||
validators_checked=$((validators_checked + 1))
|
||
else
|
||
validators_with_errors=$((validators_with_errors + 1))
|
||
validators_checked=$((validators_checked + 1))
|
||
fi
|
||
total_validators=$((total_validators + 1))
|
||
done
|
||
|
||
# Check all Sentry nodes
|
||
echo -e "${BLUE}╔══════════════════════════════════════════════════════════════╗${NC}"
|
||
echo -e "${BLUE}║ SENTRY NODES (VMIDs 1500-1503) ║${NC}"
|
||
echo -e "${BLUE}╚══════════════════════════════════════════════════════════════╝${NC}"
|
||
echo ""
|
||
|
||
for vmid in "${SENTRIES[@]}"; do
|
||
if check_node_logs "$vmid" "besu-sentry" "Sentry"; then
|
||
sentries_checked=$((sentries_checked + 1))
|
||
else
|
||
sentries_with_errors=$((sentries_with_errors + 1))
|
||
sentries_checked=$((sentries_checked + 1))
|
||
fi
|
||
total_sentries=$((total_sentries + 1))
|
||
done
|
||
|
||
# Final Summary
|
||
echo -e "${BLUE}╔══════════════════════════════════════════════════════════════╗${NC}"
|
||
echo -e "${BLUE}║ SUMMARY ║${NC}"
|
||
echo -e "${BLUE}╚══════════════════════════════════════════════════════════════╝${NC}"
|
||
echo ""
|
||
echo "Validators:"
|
||
echo " Total: $total_validators"
|
||
echo " Checked: $validators_checked"
|
||
if [ "$validators_with_errors" -eq 0 ]; then
|
||
echo -e " Errors: ${GREEN}✅ None found${NC}"
|
||
else
|
||
echo -e " Errors: ${RED}❌ Found in $validators_with_errors node(s)${NC}"
|
||
fi
|
||
echo ""
|
||
echo "Sentries:"
|
||
echo " Total: $total_sentries"
|
||
echo " Checked: $sentries_checked"
|
||
if [ "$sentries_with_errors" -eq 0 ]; then
|
||
echo -e " Errors: ${GREEN}✅ None found${NC}"
|
||
else
|
||
echo -e " Errors: ${RED}❌ Found in $sentries_with_errors node(s)${NC}"
|
||
fi
|
||
echo ""
|
||
|
||
if [ "$validators_with_errors" -eq 0 ] && [ "$sentries_with_errors" -eq 0 ]; then
|
||
echo -e "${GREEN}✅ All logs checked - No current errors found!${NC}"
|
||
exit 0
|
||
else
|
||
echo -e "${RED}❌ Errors found in some nodes. Review logs above.${NC}"
|
||
exit 1
|
||
fi
|