Files
proxmox/scripts/maintenance/check-disk-all-vmids.sh
defiQUG bea1903ac9
Some checks failed
Deploy to Phoenix / deploy (push) Has been cancelled
Sync all local changes: docs, config, scripts, submodule refs, verification evidence
Co-authored-by: Cursor <cursoragent@cursor.com>
2026-02-21 15:46:06 -08:00

74 lines
2.5 KiB
Bash
Executable File

#!/usr/bin/env bash
# Check disk space inside all LXC containers (VMIDs) across Proxmox hosts.
# Usage: ./scripts/maintenance/check-disk-all-vmids.sh [--csv]
# Requires: SSH key-based access to ml110, r630-01, r630-02.
# See: config/ip-addresses.conf, docs/09-troubleshooting/RPC_NODES_BLOCK_PRODUCTION_FIX.md
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
[[ -f "${PROJECT_ROOT}/config/ip-addresses.conf" ]] && source "${PROJECT_ROOT}/config/ip-addresses.conf" 2>/dev/null || true
ML110="${PROXMOX_HOST_ML110:-192.168.11.10}"
R630_01="${PROXMOX_HOST_R630_01:-192.168.11.11}"
R630_02="${PROXMOX_HOST_R630_02:-192.168.11.12}"
CSV=false
for a in "$@"; do [[ "$a" == "--csv" ]] && CSV=true; done
run_ssh() { ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no root@"$1" "$2" 2>/dev/null || true; }
WARN_PCT=85
CRIT_PCT=95
check_host() {
local host_ip="$1"
local host_name="$2"
local vmids
vmids=$(run_ssh "$host_ip" "pct list 2>/dev/null | awk 'NR>1 && \$2==\"running\" {print \$1}'" || true)
if [[ -z "$vmids" ]]; then
echo "Host $host_name ($host_ip): no running containers or unreachable" >&2
return 0
fi
for vmid in $vmids; do
local line
line=$(run_ssh "$host_ip" "pct exec $vmid -- df -h / 2>/dev/null | awk 'NR==2 {print \$2,\$3,\$4,\$5}'" || true)
if [[ -z "$line" ]]; then
echo "${host_name}|${vmid}|?|?|?|?" | tr '|' "${CSV:+\t}"
continue
fi
local size used avail pct
read -r size used avail pct <<< "$line"
pct_num=$(echo "$pct" | sed 's/%//')
if [[ -n "$pct_num" ]] && [[ "$pct_num" -ge "$CRIT_PCT" ]]; then
flag="CRIT"
elif [[ -n "$pct_num" ]] && [[ "$pct_num" -ge "$WARN_PCT" ]]; then
flag="WARN"
else
flag="OK"
fi
if $CSV; then
echo -e "${host_name}\t${vmid}\t${size}\t${used}\t${avail}\t${pct}\t${flag}"
else
echo " $host_name VMID $vmid: / $pct used ($avail free) [$flag]"
fi
done
}
echo "=== Disk space in all running containers (root /) ==="
echo "Thresholds: WARN >= ${WARN_PCT}%, CRIT >= ${CRIT_PCT}%"
echo ""
if $CSV; then
echo -e "Host\tVMID\tSize\tUsed\tAvail\tUse%\tStatus"
fi
check_host "$ML110" "ml110"
check_host "$R630_01" "r630-01"
check_host "$R630_02" "r630-02"
echo ""
echo "Done. For Besu nodes, also ensure /data/besu has space (RocksDB can fill disk)."
echo "See: scripts/storage-monitor.sh (host-level), docs/09-troubleshooting/RPC_NODES_BLOCK_PRODUCTION_FIX.md"