Files
proxmox/scripts/maintenance/proxmox-host-io-optimize-pass.sh
defiQUG e0bb17eff7 ops: oracle publisher LXC 3500/3501, CT migrate docs, Besu/RPC maintenance
- Provision oracle-publisher on CT 3500 (quoted DATA_SOURCE URLs, dotenv).
- Host-side pct-lxc-3501-net-up for ccip-monitor eth0 after migrate.
- CoinGecko key script: avoid sed & corruption; document quoted URLs.
- Besu node list reload, fstrim/RPC scripts, storage health docs.
- Submodule smom-dbis-138: web3 v6 pin, oracle check default host r630-02.

Made-with: Cursor
2026-03-28 15:22:23 -07:00

116 lines
3.6 KiB
Bash
Executable File

#!/usr/bin/env bash
# Additional pass: diagnose I/O + load on Proxmox nodes, then apply safe host-level optimizations.
# - Reports: load, PSI, zpool, pvesm, scrub, vzdump, running CT count
# - Applies (idempotent): vm.swappiness on ml110; sysstat; host fstrim where supported
#
# Usage: ./scripts/maintenance/proxmox-host-io-optimize-pass.sh [--diagnose-only]
# Requires: SSH key root@ ml110, r630-01, r630-02 (see config/ip-addresses.conf)
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
# shellcheck source=/dev/null
source "${PROJECT_ROOT}/config/ip-addresses.conf" 2>/dev/null || true
ML="${PROXMOX_ML110:-${PROXMOX_HOST_ML110:-192.168.11.10}}"
R1="${PROXMOX_R630_01:-${PROXMOX_HOST_R630_01:-192.168.11.11}}"
R2="${PROXMOX_R630_02:-${PROXMOX_HOST_R630_02:-192.168.11.12}}"
SSH_OPTS=(-o ConnectTimeout=20 -o ServerAliveInterval=15 -o StrictHostKeyChecking=accept-new)
DIAG_ONLY=false
[[ "${1:-}" == "--diagnose-only" ]] && DIAG_ONLY=true
remote() { ssh "${SSH_OPTS[@]}" "root@$1" bash -s; }
echo "=== Proxmox host I/O optimize pass ($(date -Is)) ==="
echo " ml110=$ML r630-01=$R1 r630-02=$R2 diagnose-only=$DIAG_ONLY"
echo ""
for H in "$ML" "$R1" "$R2"; do
echo "########## DIAGNOSTIC: $H ##########"
remote "$H" <<'EOS'
set +e
hostname
uptime
echo "--- PSI ---"
cat /proc/pressure/cpu 2>/dev/null | head -2
cat /proc/pressure/io 2>/dev/null | head -2
echo "--- pvesm ---"
pvesm status 2>/dev/null | head -25
echo "--- running workloads ---"
echo -n "LXC running: "; pct list 2>/dev/null | awk 'NR>1 && $2=="running"' | wc -l
echo -n "VM running: "; qm list 2>/dev/null | awk 'NR>1 && $3=="running"' | wc -l
echo "--- vzdump ---"
ps aux 2>/dev/null | grep -E '[v]zdump|[p]bs-|proxmox-backup' | head -5 || echo "(none visible)"
echo "--- ZFS ---"
zpool status 2>/dev/null | head -20 || echo "no zfs"
echo "--- scrub ---"
zpool status 2>/dev/null | grep -E 'scan|scrub' || true
EOS
echo ""
done
if $DIAG_ONLY; then
echo "Diagnose-only: done."
exit 0
fi
echo "########## OPTIMIZE: ml110 swappiness ##########"
remote "$ML" <<'EOS'
set -e
F=/etc/sysctl.d/99-proxmox-ml110-swappiness.conf
if ! grep -q '^vm.swappiness=10$' "$F" 2>/dev/null; then
printf '%s\n' '# Prefer RAM over swap when plenty of memory free (operator pass)' 'vm.swappiness=10' > "$F"
sysctl -p "$F"
echo "Wrote and applied $F"
else
echo "Already vm.swappiness=10 in $F"
sysctl vm.swappiness=10 2>/dev/null || true
fi
EOS
echo ""
echo "########## OPTIMIZE: sysstat (all hosts) ##########"
for H in "$ML" "$R1" "$R2"; do
echo "--- $H ---"
remote "$H" <<'EOS'
set -e
export DEBIAN_FRONTEND=noninteractive
if command -v sar >/dev/null 2>&1; then
echo "sysstat already present"
else
apt-get update -qq && apt-get install -y -qq sysstat
fi
sed -i 's/^ENABLED="false"/ENABLED="true"/' /etc/default/sysstat 2>/dev/null || true
systemctl enable sysstat 2>/dev/null || true
systemctl restart sysstat 2>/dev/null || true
echo "sar: $(command -v sar || echo missing)"
EOS
done
echo ""
echo "########## OPTIMIZE: host fstrim (hypervisor root / and /var/lib/vz if supported) ##########"
for H in "$ML" "$R1" "$R2"; do
echo "--- $H ---"
remote "$H" <<'EOS'
set +e
for m in / /var/lib/vz; do
if mountpoint -q "$m" 2>/dev/null; then
out=$(fstrim -v "$m" 2>&1)
echo "$m: $out"
fi
done
EOS
done
echo ""
echo "########## POST: quick load snapshot ##########"
for H in "$ML" "$R1" "$R2"; do
echo -n "$H "
ssh "${SSH_OPTS[@]}" "root@$H" "cat /proc/loadavg | cut -d' ' -f1-3" 2>/dev/null || echo "unreachable"
done
echo ""
echo "Done. Optional: run ./scripts/maintenance/fstrim-all-running-ct.sh during a quiet window (can be I/O heavy)."