Some checks failed
Deploy to Phoenix / deploy (push) Has been cancelled
Co-authored-by: Cursor <cursoragent@cursor.com>
74 lines
2.5 KiB
Bash
Executable File
74 lines
2.5 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# Check disk space inside all LXC containers (VMIDs) across Proxmox hosts.
|
|
# Usage: ./scripts/maintenance/check-disk-all-vmids.sh [--csv]
|
|
# Requires: SSH key-based access to ml110, r630-01, r630-02.
|
|
# See: config/ip-addresses.conf, docs/09-troubleshooting/RPC_NODES_BLOCK_PRODUCTION_FIX.md
|
|
|
|
set -euo pipefail
|
|
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
|
[[ -f "${PROJECT_ROOT}/config/ip-addresses.conf" ]] && source "${PROJECT_ROOT}/config/ip-addresses.conf" 2>/dev/null || true
|
|
|
|
ML110="${PROXMOX_HOST_ML110:-192.168.11.10}"
|
|
R630_01="${PROXMOX_HOST_R630_01:-192.168.11.11}"
|
|
R630_02="${PROXMOX_HOST_R630_02:-192.168.11.12}"
|
|
|
|
CSV=false
|
|
for a in "$@"; do [[ "$a" == "--csv" ]] && CSV=true; done
|
|
|
|
run_ssh() { ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no root@"$1" "$2" 2>/dev/null || true; }
|
|
|
|
WARN_PCT=85
|
|
CRIT_PCT=95
|
|
|
|
check_host() {
|
|
local host_ip="$1"
|
|
local host_name="$2"
|
|
local vmids
|
|
vmids=$(run_ssh "$host_ip" "pct list 2>/dev/null | awk 'NR>1 && \$2==\"running\" {print \$1}'" || true)
|
|
if [[ -z "$vmids" ]]; then
|
|
echo "Host $host_name ($host_ip): no running containers or unreachable" >&2
|
|
return 0
|
|
fi
|
|
for vmid in $vmids; do
|
|
local line
|
|
line=$(run_ssh "$host_ip" "pct exec $vmid -- df -h / 2>/dev/null | awk 'NR==2 {print \$2,\$3,\$4,\$5}'" || true)
|
|
if [[ -z "$line" ]]; then
|
|
echo "${host_name}|${vmid}|?|?|?|?" | tr '|' "${CSV:+\t}"
|
|
continue
|
|
fi
|
|
local size used avail pct
|
|
read -r size used avail pct <<< "$line"
|
|
pct_num=$(echo "$pct" | sed 's/%//')
|
|
if [[ -n "$pct_num" ]] && [[ "$pct_num" -ge "$CRIT_PCT" ]]; then
|
|
flag="CRIT"
|
|
elif [[ -n "$pct_num" ]] && [[ "$pct_num" -ge "$WARN_PCT" ]]; then
|
|
flag="WARN"
|
|
else
|
|
flag="OK"
|
|
fi
|
|
if $CSV; then
|
|
echo -e "${host_name}\t${vmid}\t${size}\t${used}\t${avail}\t${pct}\t${flag}"
|
|
else
|
|
echo " $host_name VMID $vmid: / $pct used ($avail free) [$flag]"
|
|
fi
|
|
done
|
|
}
|
|
|
|
echo "=== Disk space in all running containers (root /) ==="
|
|
echo "Thresholds: WARN >= ${WARN_PCT}%, CRIT >= ${CRIT_PCT}%"
|
|
echo ""
|
|
|
|
if $CSV; then
|
|
echo -e "Host\tVMID\tSize\tUsed\tAvail\tUse%\tStatus"
|
|
fi
|
|
|
|
check_host "$ML110" "ml110"
|
|
check_host "$R630_01" "r630-01"
|
|
check_host "$R630_02" "r630-02"
|
|
|
|
echo ""
|
|
echo "Done. For Besu nodes, also ensure /data/besu has space (RocksDB can fill disk)."
|
|
echo "See: scripts/storage-monitor.sh (host-level), docs/09-troubleshooting/RPC_NODES_BLOCK_PRODUCTION_FIX.md"
|