Files
proxmox/scripts/storage-monitor.sh.bak
defiQUG fbda1b4beb
Some checks failed
Deploy to Phoenix / deploy (push) Has been cancelled
docs: Ledger Live integration, contract deploy learnings, NEXT_STEPS updates
- ADD_CHAIN138_TO_LEDGER_LIVE: Ledger form done; public code review repo bis-innovations/LedgerLive; init/push commands
- CONTRACT_DEPLOYMENT_RUNBOOK: Chain 138 gas price 1 gwei, 36-addr check, TransactionMirror workaround
- CONTRACT_*: AddressMapper, MirrorManager deployed 2026-02-12; 36-address on-chain check
- NEXT_STEPS_FOR_YOU: Ledger done; steps completable now (no LAN); run-completable-tasks-from-anywhere
- MASTER_INDEX, OPERATOR_OPTIONAL, SMART_CONTRACTS_INVENTORY_SIMPLE: updates
- LEDGER_BLOCKCHAIN_INTEGRATION_COMPLETE: bis-innovations/LedgerLive reference

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-02-12 15:46:57 -08:00

310 lines
8.9 KiB
Bash
Executable File

#!/usr/bin/env bash
# Proxmox Storage Monitoring Script with Alerts
# Monitors storage usage across all Proxmox nodes and sends alerts
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
LOG_DIR="${PROJECT_ROOT}/logs/storage-monitoring"
ALERT_LOG="${LOG_DIR}/storage_alerts_$(date +%Y%m%d).log"
STATUS_LOG="${LOG_DIR}/storage_status_$(date +%Y%m%d).log"
# Alert thresholds
WARNING_THRESHOLD=80
CRITICAL_THRESHOLD=90
VG_FREE_WARNING=10 # GB
VG_FREE_CRITICAL=5 # GB
# Colors
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
CYAN='\033[0;36m'
NC='\033[0m'
log_info() { echo -e "${BLUE}[INFO]${NC} $1"; }
log_success() { echo -e "${GREEN}[✓]${NC} $1"; }
log_warn() { echo -e "${YELLOW}[⚠]${NC} $1"; }
log_error() { echo -e "${RED}[✗]${NC} $1"; }
log_alert() { echo -e "${RED}[ALERT]${NC} $1"; }
# Create log directory
mkdir -p "$LOG_DIR"
# Proxmox nodes configuration
declare -A NODES
NODES[ml110]="192.168.11.10:L@kers2010"
NODES[r630-01]="192.168.11.11:password"
NODES[r630-02]="192.168.11.12:password"
NODES[r630-03]="192.168.11.13:L@kers2010"
NODES[r630-04]="192.168.11.14:L@kers2010"
# Alert tracking
declare -a ALERTS
# SSH helper function
ssh_node() {
local hostname="$1"
shift
local ip="${NODES[$hostname]%%:*}"
local password="${NODES[$hostname]#*:}"
if command -v sshpass >/dev/null 2>&1; then
sshpass -p "$password" ssh -o StrictHostKeyChecking=no -o ConnectTimeout=5 root@"$ip" "$@" 2>/dev/null || echo ""
else
ssh -o StrictHostKeyChecking=no -o ConnectTimeout=5 root@"$ip" "$@" 2>/dev/null || echo ""
fi
}
# Check node connectivity
check_node() {
local hostname="$1"
local ip="${NODES[$hostname]%%:*}"
ping -c 1 -W 2 "$ip" >/dev/null 2>&1
}
# Parse storage usage percentage
parse_storage_percent() {
local percent_str="$1"
# Remove % sign and convert to integer
echo "$percent_str" | sed 's/%//' | awk '{print int($1)}'
}
# Check storage usage
check_storage_usage() {
local hostname="$1"
local storage_line="$2"
local storage_name=$(echo "$storage_line" | awk '{print $1}')
local storage_type=$(echo "$storage_line" | awk '{print $2}')
local status=$(echo "$storage_line" | awk '{print $3}')
local total=$(echo "$storage_line" | awk '{print $4}')
local used=$(echo "$storage_line" | awk '{print $5}')
local available=$(echo "$storage_line" | awk '{print $6}')
local percent_str=$(echo "$storage_line" | awk '{print $7}')
# Skip if disabled or inactive
if [ "$status" = "disabled" ] || [ "$status" = "inactive" ] || [ "$percent_str" = "N/A" ]; then
return 0
fi
local percent=$(parse_storage_percent "$percent_str")
if [ -z "$percent" ] || [ "$percent" -eq 0 ]; then
return 0
fi
# Check thresholds
if [ "$percent" -ge "$CRITICAL_THRESHOLD" ]; then
ALERTS+=("CRITICAL: $hostname:$storage_name is at ${percent}% capacity (${available} available)")
log_alert "CRITICAL: $hostname:$storage_name is at ${percent}% capacity"
return 2
elif [ "$percent" -ge "$WARNING_THRESHOLD" ]; then
ALERTS+=("WARNING: $hostname:$storage_name is at ${percent}% capacity (${available} available)")
log_warn "WARNING: $hostname:$storage_name is at ${percent}% capacity"
return 1
fi
return 0
}
# Check volume group free space
check_vg_free_space() {
local hostname="$1"
local vg_line="$2"
local vg_name=$(echo "$vg_line" | awk '{print $1}')
local vg_size=$(echo "$vg_line" | awk '{print $2}')
local vg_free=$(echo "$vg_line" | awk '{print $3}')
# Extract numeric value (remove 'g' suffix)
local free_gb=$(echo "$vg_free" | sed 's/g//' | awk '{print int($1)}')
if [ -z "$free_gb" ] || [ "$free_gb" -eq 0 ]; then
return 0
fi
if [ "$free_gb" -le "$VG_FREE_CRITICAL" ]; then
ALERTS+=("CRITICAL: $hostname:$vg_name volume group has only ${free_gb}GB free space")
log_alert "CRITICAL: $hostname:$vg_name VG has only ${free_gb}GB free"
return 2
elif [ "$free_gb" -le "$VG_FREE_WARNING" ]; then
ALERTS+=("WARNING: $hostname:$vg_name volume group has only ${free_gb}GB free space")
log_warn "WARNING: $hostname:$vg_name VG has only ${free_gb}GB free"
return 1
fi
return 0
}
# Monitor a single node
monitor_node() {
local hostname="$1"
if ! check_node "$hostname"; then
log_warn "$hostname is not reachable"
return 1
fi
log_info "Monitoring $hostname..."
# Get storage status
local storage_status=$(ssh_node "$hostname" 'pvesm status 2>/dev/null' || echo "")
if [ -z "$storage_status" ]; then
log_warn "Could not get storage status from $hostname"
return 1
fi
# Process each storage line (skip header)
echo "$storage_status" | tail -n +2 | while IFS= read -r line; do
if [ -n "$line" ]; then
check_storage_usage "$hostname" "$line"
fi
done
# Check volume groups
local vgs_info=$(ssh_node "$hostname" 'vgs --units g --noheadings -o vg_name,vg_size,vg_free 2>/dev/null' || echo "")
if [ -n "$vgs_info" ]; then
echo "$vgs_info" | while IFS= read -r line; do
if [ -n "$line" ]; then
check_vg_free_space "$hostname" "$line"
fi
done
fi
# Log storage status
{
echo "=== $hostname Storage Status $(date) ==="
echo "$storage_status"
echo ""
echo "=== Volume Groups ==="
echo "$vgs_info"
echo ""
} >> "$STATUS_LOG"
return 0
}
# Send alerts (can be extended to email, Slack, etc.)
send_alerts() {
if [ ${#ALERTS[@]} -eq 0 ]; then
log_success "No storage alerts"
return 0
fi
log_warn "Found ${#ALERTS[@]} storage alert(s)"
{
echo "=== Storage Alerts $(date) ==="
for alert in "${ALERTS[@]}"; do
echo "$alert"
done
echo ""
} >> "$ALERT_LOG"
# Print alerts
for alert in "${ALERTS[@]}"; do
echo "$alert"
done
# TODO: Add email/Slack/webhook notifications here
# Example:
# send_email "Storage Alerts" "$(printf '%s\n' "${ALERTS[@]}")"
# send_slack_webhook "${ALERTS[@]}"
}
# Generate summary report
generate_summary() {
local summary_file="${LOG_DIR}/storage_summary_$(date +%Y%m%d).txt"
{
echo "=== Proxmox Storage Summary $(date) ==="
echo ""
echo "Nodes Monitored:"
for hostname in "${!NODES[@]}"; do
if check_node "$hostname"; then
echo "$hostname"
else
echo "$hostname (not reachable)"
fi
done
echo ""
echo "Alerts: ${#ALERTS[@]}"
if [ ${#ALERTS[@]} -gt 0 ]; then
echo ""
for alert in "${ALERTS[@]}"; do
echo " - $alert"
done
fi
echo ""
echo "Thresholds:"
echo " Storage Usage Warning: ${WARNING_THRESHOLD}%"
echo " Storage Usage Critical: ${CRITICAL_THRESHOLD}%"
echo " Volume Group Free Warning: ${VG_FREE_WARNING}GB"
echo " Volume Group Free Critical: ${VG_FREE_CRITICAL}GB"
} > "$summary_file"
log_info "Summary saved to: $summary_file"
}
# Main monitoring function
main() {
local mode="${1:-check}"
case "$mode" in
check)
echo "=== Proxmox Storage Monitoring ==="
echo "Date: $(date)"
echo ""
# Monitor all nodes
for hostname in "${!NODES[@]}"; do
monitor_node "$hostname"
done
# Send alerts
send_alerts
# Generate summary
generate_summary
echo ""
log_info "Monitoring complete. Check logs in: $LOG_DIR"
;;
status)
# Show current status
echo "=== Current Storage Status ==="
for hostname in "${!NODES[@]}"; do
if check_node "$hostname"; then
echo ""
echo "--- $hostname ---"
ssh_node "$hostname" 'pvesm status 2>/dev/null' || echo "Could not get status"
fi
done
;;
alerts)
# Show recent alerts
if [ -f "$ALERT_LOG" ]; then
tail -50 "$ALERT_LOG"
else
echo "No alerts found"
fi
;;
*)
echo "Usage: $0 [check|status|alerts]"
echo " check - Run full monitoring check (default)"
echo " status - Show current storage status"
echo " alerts - Show recent alerts"
exit 1
;;
esac
}
# Run main function
main "$@"