Files
proxmox/scripts/check-validator-sentry-logs.sh
defiQUG cb47cce074 Complete markdown files cleanup and organization
- Organized 252 files across project
- Root directory: 187 → 2 files (98.9% reduction)
- Moved configuration guides to docs/04-configuration/
- Moved troubleshooting guides to docs/09-troubleshooting/
- Moved quick start guides to docs/01-getting-started/
- Moved reports to reports/ directory
- Archived temporary files
- Generated comprehensive reports and documentation
- Created maintenance scripts and guides

All files organized according to established standards.
2026-01-06 01:46:25 -08:00

283 lines
11 KiB
Bash
Executable File
Raw Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env bash
# Check all Validator and Sentry node logs for errors
# Validators: VMIDs 1000-1004
# Sentries: VMIDs 1500-1503
set -euo pipefail
# Colors
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
# Proxmox host configuration
PROXMOX_HOST="${PROXMOX_HOST:-192.168.11.10}"
SSH_PASSWORD="${SSH_PASSWORD:-L@kers2010}"
# Node IP mappings
declare -A NODE_IPS=(
[1000]="192.168.11.100"
[1001]="192.168.11.101"
[1002]="192.168.11.102"
[1003]="192.168.11.103"
[1004]="192.168.11.104"
[1500]="192.168.11.150"
[1501]="192.168.11.151"
[1502]="192.168.11.152"
[1503]="192.168.11.153"
)
# Node definitions
VALIDATORS=(1000 1001 1002 1003 1004)
SENTRIES=(1500 1501 1502 1503)
LOG_LINES="${1:-100}"
# Check if sshpass is available
if ! command -v sshpass >/dev/null 2>&1; then
echo "⚠️ sshpass not installed. Attempting to install..."
sudo apt-get update -qq && sudo apt-get install -y sshpass 2>/dev/null || {
echo "❌ Cannot install sshpass automatically"
echo "Please install manually: sudo apt-get install sshpass"
exit 1
}
fi
# Error patterns to search for
ERROR_PATTERNS=(
"error"
"Error"
"ERROR"
"failed"
"Failed"
"FAILED"
"exception"
"Exception"
"EXCEPTION"
"fatal"
"Fatal"
"FATAL"
"panic"
"Panic"
"PANIC"
"Unable to read"
"file not found"
"configuration"
"restart"
"crash"
"timeout"
"Timeout"
"connection refused"
"Connection refused"
)
echo -e "${BLUE}╔══════════════════════════════════════════════════════════════╗${NC}"
echo -e "${BLUE}║ CHECKING ALL VALIDATOR AND SENTRY NODE LOGS ║${NC}"
echo -e "${BLUE}╚══════════════════════════════════════════════════════════════╝${NC}"
echo ""
echo "Checking last $LOG_LINES lines of logs for each node"
echo ""
# Function to check logs for a node
check_node_logs() {
local vmid=$1
local service_name=$2
local node_type=$3
echo -e "${BLUE}═══════════════════════════════════════════════════════════════${NC}"
echo -e "${BLUE}Checking ${node_type} VMID $vmid (service: $service_name)${NC}"
echo -e "${BLUE}═══════════════════════════════════════════════════════════════${NC}"
# Get container IP
local container_ip="${NODE_IPS[$vmid]}"
if [ -z "$container_ip" ]; then
echo -e "${RED}❌ VMID $vmid: IP address not found in mapping${NC}"
echo ""
return 1
fi
# Try to access container directly via SSH first
local logs=""
local service_status="unknown"
# Check if we can access via Proxmox host (preferred method)
if ssh -o StrictHostKeyChecking=accept-new -o ConnectTimeout=3 -i ~/.ssh/id_ed25519_proxmox "root@${PROXMOX_HOST}" "pct status $vmid 2>/dev/null" &>/dev/null; then
# Access via Proxmox host
local status_output
status_output=$(ssh -o StrictHostKeyChecking=accept-new -o ConnectTimeout=5 -i ~/.ssh/id_ed25519_proxmox "root@${PROXMOX_HOST}" \
"pct status $vmid 2>/dev/null" || echo "")
if [ -z "$status_output" ]; then
echo -e "${RED}❌ VMID $vmid: Container not found or not accessible${NC}"
echo ""
return 1
fi
local status=$(echo "$status_output" | awk '{print $2}' || echo "unknown")
if [ "$status" != "running" ]; then
echo -e "${YELLOW}⚠️ VMID $vmid: Container is not running (status: $status)${NC}"
echo ""
return 1
fi
# Check service status
service_status=$(ssh -o StrictHostKeyChecking=accept-new -o ConnectTimeout=5 -i ~/.ssh/id_ed25519_proxmox "root@${PROXMOX_HOST}" \
"pct exec $vmid -- systemctl is-active $service_name.service 2>/dev/null" || echo "inactive")
# Get recent logs
logs=$(ssh -o StrictHostKeyChecking=accept-new -o ConnectTimeout=5 -i ~/.ssh/id_ed25519_proxmox "root@${PROXMOX_HOST}" \
"pct exec $vmid -- journalctl -u $service_name.service -n $LOG_LINES --no-pager 2>/dev/null" || echo "")
else
# Fallback: Try direct SSH to container
echo -e "${YELLOW}⚠️ Cannot access via Proxmox host, trying direct SSH to container...${NC}"
# Check service status via direct SSH
service_status=$(sshpass -p "$SSH_PASSWORD" ssh -o StrictHostKeyChecking=accept-new -o ConnectTimeout=5 \
"root@${container_ip}" \
"systemctl is-active $service_name.service 2>/dev/null" || echo "inactive")
# Get recent logs via direct SSH
logs=$(sshpass -p "$SSH_PASSWORD" ssh -o StrictHostKeyChecking=accept-new -o ConnectTimeout=5 \
"root@${container_ip}" \
"journalctl -u $service_name.service -n $LOG_LINES --no-pager 2>/dev/null" || echo "")
fi
if [ "$service_status" != "active" ]; then
echo -e "${YELLOW}⚠️ Service $service_name is not active (status: $service_status)${NC}"
else
echo -e "${GREEN}✅ Service $service_name is active${NC}"
fi
# Get recent logs
echo ""
echo "Recent logs (last $LOG_LINES lines):"
echo "---"
if [ -z "$logs" ]; then
echo -e "${YELLOW}⚠️ No logs found for service $service_name${NC}"
echo ""
return 1
fi
# Display logs
echo "$logs"
echo "---"
echo ""
# Check for errors
echo "Checking for errors..."
local error_found=false
local error_count=0
for pattern in "${ERROR_PATTERNS[@]}"; do
local matches=$(echo "$logs" | grep -i "$pattern" | grep -v "restart counter" | grep -v "Scheduled restart" | grep -v "CORS Rejected" || true)
if [ -n "$matches" ]; then
local match_count=$(echo "$matches" | wc -l)
error_count=$((error_count + match_count))
if [ "$error_found" = false ]; then
error_found=true
echo -e "${RED}❌ ERRORS FOUND:${NC}"
fi
echo -e "${RED} Pattern '$pattern' found $match_count time(s):${NC}"
echo "$matches" | head -5 | sed 's/^/ /'
if [ "$match_count" -gt 5 ]; then
echo -e "${YELLOW} ... and $((match_count - 5)) more occurrence(s)${NC}"
fi
fi
done
# Check restart count
local restart_count=$(echo "$logs" | grep -i "restart counter" | tail -1 | grep -oP 'restart counter is at \K\d+' || echo "0")
if [ "$restart_count" != "0" ] && [ -n "$restart_count" ]; then
if [ "$restart_count" -gt 10 ]; then
echo -e "${RED}⚠️ High restart count: $restart_count${NC}"
error_found=true
elif [ "$restart_count" -gt 0 ]; then
echo -e "${YELLOW} Restart count: $restart_count${NC}"
fi
fi
echo ""
if [ "$error_found" = false ]; then
echo -e "${GREEN}✅ No errors found in recent logs${NC}"
return 0
else
echo -e "${RED}❌ Total error occurrences: $error_count${NC}"
return 1
fi
}
# Summary tracking
total_validators=0
total_sentries=0
validators_with_errors=0
sentries_with_errors=0
validators_checked=0
sentries_checked=0
# Check all Validator nodes
echo -e "${BLUE}╔══════════════════════════════════════════════════════════════╗${NC}"
echo -e "${BLUE}║ VALIDATOR NODES (VMIDs 1000-1004) ║${NC}"
echo -e "${BLUE}╚══════════════════════════════════════════════════════════════╝${NC}"
echo ""
for vmid in "${VALIDATORS[@]}"; do
if check_node_logs "$vmid" "besu-validator" "Validator"; then
validators_checked=$((validators_checked + 1))
else
validators_with_errors=$((validators_with_errors + 1))
validators_checked=$((validators_checked + 1))
fi
total_validators=$((total_validators + 1))
done
# Check all Sentry nodes
echo -e "${BLUE}╔══════════════════════════════════════════════════════════════╗${NC}"
echo -e "${BLUE}║ SENTRY NODES (VMIDs 1500-1503) ║${NC}"
echo -e "${BLUE}╚══════════════════════════════════════════════════════════════╝${NC}"
echo ""
for vmid in "${SENTRIES[@]}"; do
if check_node_logs "$vmid" "besu-sentry" "Sentry"; then
sentries_checked=$((sentries_checked + 1))
else
sentries_with_errors=$((sentries_with_errors + 1))
sentries_checked=$((sentries_checked + 1))
fi
total_sentries=$((total_sentries + 1))
done
# Final Summary
echo -e "${BLUE}╔══════════════════════════════════════════════════════════════╗${NC}"
echo -e "${BLUE}║ SUMMARY ║${NC}"
echo -e "${BLUE}╚══════════════════════════════════════════════════════════════╝${NC}"
echo ""
echo "Validators:"
echo " Total: $total_validators"
echo " Checked: $validators_checked"
if [ "$validators_with_errors" -eq 0 ]; then
echo -e " Errors: ${GREEN}✅ None found${NC}"
else
echo -e " Errors: ${RED}❌ Found in $validators_with_errors node(s)${NC}"
fi
echo ""
echo "Sentries:"
echo " Total: $total_sentries"
echo " Checked: $sentries_checked"
if [ "$sentries_with_errors" -eq 0 ]; then
echo -e " Errors: ${GREEN}✅ None found${NC}"
else
echo -e " Errors: ${RED}❌ Found in $sentries_with_errors node(s)${NC}"
fi
echo ""
if [ "$validators_with_errors" -eq 0 ] && [ "$sentries_with_errors" -eq 0 ]; then
echo -e "${GREEN}✅ All logs checked - No current errors found!${NC}"
exit 0
else
echo -e "${RED}❌ Errors found in some nodes. Review logs above.${NC}"
exit 1
fi