Some checks failed
Deploy to Phoenix / deploy (push) Has been cancelled
- ADD_CHAIN138_TO_LEDGER_LIVE: Ledger form done; public code review repo bis-innovations/LedgerLive; init/push commands - CONTRACT_DEPLOYMENT_RUNBOOK: Chain 138 gas price 1 gwei, 36-addr check, TransactionMirror workaround - CONTRACT_*: AddressMapper, MirrorManager deployed 2026-02-12; 36-address on-chain check - NEXT_STEPS_FOR_YOU: Ledger done; steps completable now (no LAN); run-completable-tasks-from-anywhere - MASTER_INDEX, OPERATOR_OPTIONAL, SMART_CONTRACTS_INVENTORY_SIMPLE: updates - LEDGER_BLOCKCHAIN_INTEGRATION_COMPLETE: bis-innovations/LedgerLive reference Co-authored-by: Cursor <cursoragent@cursor.com>
310 lines
8.9 KiB
Bash
Executable File
310 lines
8.9 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# Proxmox Storage Monitoring Script with Alerts
|
|
# Monitors storage usage across all Proxmox nodes and sends alerts
|
|
|
|
set -euo pipefail
|
|
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
|
LOG_DIR="${PROJECT_ROOT}/logs/storage-monitoring"
|
|
ALERT_LOG="${LOG_DIR}/storage_alerts_$(date +%Y%m%d).log"
|
|
STATUS_LOG="${LOG_DIR}/storage_status_$(date +%Y%m%d).log"
|
|
|
|
# Alert thresholds
|
|
WARNING_THRESHOLD=80
|
|
CRITICAL_THRESHOLD=90
|
|
VG_FREE_WARNING=10 # GB
|
|
VG_FREE_CRITICAL=5 # GB
|
|
|
|
# Colors
|
|
RED='\033[0;31m'
|
|
GREEN='\033[0;32m'
|
|
YELLOW='\033[1;33m'
|
|
BLUE='\033[0;34m'
|
|
CYAN='\033[0;36m'
|
|
NC='\033[0m'
|
|
|
|
log_info() { echo -e "${BLUE}[INFO]${NC} $1"; }
|
|
log_success() { echo -e "${GREEN}[✓]${NC} $1"; }
|
|
log_warn() { echo -e "${YELLOW}[⚠]${NC} $1"; }
|
|
log_error() { echo -e "${RED}[✗]${NC} $1"; }
|
|
log_alert() { echo -e "${RED}[ALERT]${NC} $1"; }
|
|
|
|
# Create log directory
|
|
mkdir -p "$LOG_DIR"
|
|
|
|
# Proxmox nodes configuration
|
|
declare -A NODES
|
|
NODES[ml110]="192.168.11.10:L@kers2010"
|
|
NODES[r630-01]="192.168.11.11:password"
|
|
NODES[r630-02]="192.168.11.12:password"
|
|
NODES[r630-03]="192.168.11.13:L@kers2010"
|
|
NODES[r630-04]="192.168.11.14:L@kers2010"
|
|
|
|
# Alert tracking
|
|
declare -a ALERTS
|
|
|
|
# SSH helper function
|
|
ssh_node() {
|
|
local hostname="$1"
|
|
shift
|
|
local ip="${NODES[$hostname]%%:*}"
|
|
local password="${NODES[$hostname]#*:}"
|
|
|
|
if command -v sshpass >/dev/null 2>&1; then
|
|
sshpass -p "$password" ssh -o StrictHostKeyChecking=no -o ConnectTimeout=5 root@"$ip" "$@" 2>/dev/null || echo ""
|
|
else
|
|
ssh -o StrictHostKeyChecking=no -o ConnectTimeout=5 root@"$ip" "$@" 2>/dev/null || echo ""
|
|
fi
|
|
}
|
|
|
|
# Check node connectivity
|
|
check_node() {
|
|
local hostname="$1"
|
|
local ip="${NODES[$hostname]%%:*}"
|
|
|
|
ping -c 1 -W 2 "$ip" >/dev/null 2>&1
|
|
}
|
|
|
|
# Parse storage usage percentage
|
|
parse_storage_percent() {
|
|
local percent_str="$1"
|
|
# Remove % sign and convert to integer
|
|
echo "$percent_str" | sed 's/%//' | awk '{print int($1)}'
|
|
}
|
|
|
|
# Check storage usage
|
|
check_storage_usage() {
|
|
local hostname="$1"
|
|
local storage_line="$2"
|
|
|
|
local storage_name=$(echo "$storage_line" | awk '{print $1}')
|
|
local storage_type=$(echo "$storage_line" | awk '{print $2}')
|
|
local status=$(echo "$storage_line" | awk '{print $3}')
|
|
local total=$(echo "$storage_line" | awk '{print $4}')
|
|
local used=$(echo "$storage_line" | awk '{print $5}')
|
|
local available=$(echo "$storage_line" | awk '{print $6}')
|
|
local percent_str=$(echo "$storage_line" | awk '{print $7}')
|
|
|
|
# Skip if disabled or inactive
|
|
if [ "$status" = "disabled" ] || [ "$status" = "inactive" ] || [ "$percent_str" = "N/A" ]; then
|
|
return 0
|
|
fi
|
|
|
|
local percent=$(parse_storage_percent "$percent_str")
|
|
|
|
if [ -z "$percent" ] || [ "$percent" -eq 0 ]; then
|
|
return 0
|
|
fi
|
|
|
|
# Check thresholds
|
|
if [ "$percent" -ge "$CRITICAL_THRESHOLD" ]; then
|
|
ALERTS+=("CRITICAL: $hostname:$storage_name is at ${percent}% capacity (${available} available)")
|
|
log_alert "CRITICAL: $hostname:$storage_name is at ${percent}% capacity"
|
|
return 2
|
|
elif [ "$percent" -ge "$WARNING_THRESHOLD" ]; then
|
|
ALERTS+=("WARNING: $hostname:$storage_name is at ${percent}% capacity (${available} available)")
|
|
log_warn "WARNING: $hostname:$storage_name is at ${percent}% capacity"
|
|
return 1
|
|
fi
|
|
|
|
return 0
|
|
}
|
|
|
|
# Check volume group free space
|
|
check_vg_free_space() {
|
|
local hostname="$1"
|
|
local vg_line="$2"
|
|
|
|
local vg_name=$(echo "$vg_line" | awk '{print $1}')
|
|
local vg_size=$(echo "$vg_line" | awk '{print $2}')
|
|
local vg_free=$(echo "$vg_line" | awk '{print $3}')
|
|
|
|
# Extract numeric value (remove 'g' suffix)
|
|
local free_gb=$(echo "$vg_free" | sed 's/g//' | awk '{print int($1)}')
|
|
|
|
if [ -z "$free_gb" ] || [ "$free_gb" -eq 0 ]; then
|
|
return 0
|
|
fi
|
|
|
|
if [ "$free_gb" -le "$VG_FREE_CRITICAL" ]; then
|
|
ALERTS+=("CRITICAL: $hostname:$vg_name volume group has only ${free_gb}GB free space")
|
|
log_alert "CRITICAL: $hostname:$vg_name VG has only ${free_gb}GB free"
|
|
return 2
|
|
elif [ "$free_gb" -le "$VG_FREE_WARNING" ]; then
|
|
ALERTS+=("WARNING: $hostname:$vg_name volume group has only ${free_gb}GB free space")
|
|
log_warn "WARNING: $hostname:$vg_name VG has only ${free_gb}GB free"
|
|
return 1
|
|
fi
|
|
|
|
return 0
|
|
}
|
|
|
|
# Monitor a single node
|
|
monitor_node() {
|
|
local hostname="$1"
|
|
|
|
if ! check_node "$hostname"; then
|
|
log_warn "$hostname is not reachable"
|
|
return 1
|
|
fi
|
|
|
|
log_info "Monitoring $hostname..."
|
|
|
|
# Get storage status
|
|
local storage_status=$(ssh_node "$hostname" 'pvesm status 2>/dev/null' || echo "")
|
|
|
|
if [ -z "$storage_status" ]; then
|
|
log_warn "Could not get storage status from $hostname"
|
|
return 1
|
|
fi
|
|
|
|
# Process each storage line (skip header)
|
|
echo "$storage_status" | tail -n +2 | while IFS= read -r line; do
|
|
if [ -n "$line" ]; then
|
|
check_storage_usage "$hostname" "$line"
|
|
fi
|
|
done
|
|
|
|
# Check volume groups
|
|
local vgs_info=$(ssh_node "$hostname" 'vgs --units g --noheadings -o vg_name,vg_size,vg_free 2>/dev/null' || echo "")
|
|
|
|
if [ -n "$vgs_info" ]; then
|
|
echo "$vgs_info" | while IFS= read -r line; do
|
|
if [ -n "$line" ]; then
|
|
check_vg_free_space "$hostname" "$line"
|
|
fi
|
|
done
|
|
fi
|
|
|
|
# Log storage status
|
|
{
|
|
echo "=== $hostname Storage Status $(date) ==="
|
|
echo "$storage_status"
|
|
echo ""
|
|
echo "=== Volume Groups ==="
|
|
echo "$vgs_info"
|
|
echo ""
|
|
} >> "$STATUS_LOG"
|
|
|
|
return 0
|
|
}
|
|
|
|
# Send alerts (can be extended to email, Slack, etc.)
|
|
send_alerts() {
|
|
if [ ${#ALERTS[@]} -eq 0 ]; then
|
|
log_success "No storage alerts"
|
|
return 0
|
|
fi
|
|
|
|
log_warn "Found ${#ALERTS[@]} storage alert(s)"
|
|
|
|
{
|
|
echo "=== Storage Alerts $(date) ==="
|
|
for alert in "${ALERTS[@]}"; do
|
|
echo "$alert"
|
|
done
|
|
echo ""
|
|
} >> "$ALERT_LOG"
|
|
|
|
# Print alerts
|
|
for alert in "${ALERTS[@]}"; do
|
|
echo "$alert"
|
|
done
|
|
|
|
# TODO: Add email/Slack/webhook notifications here
|
|
# Example:
|
|
# send_email "Storage Alerts" "$(printf '%s\n' "${ALERTS[@]}")"
|
|
# send_slack_webhook "${ALERTS[@]}"
|
|
}
|
|
|
|
# Generate summary report
|
|
generate_summary() {
|
|
local summary_file="${LOG_DIR}/storage_summary_$(date +%Y%m%d).txt"
|
|
|
|
{
|
|
echo "=== Proxmox Storage Summary $(date) ==="
|
|
echo ""
|
|
echo "Nodes Monitored:"
|
|
for hostname in "${!NODES[@]}"; do
|
|
if check_node "$hostname"; then
|
|
echo " ✅ $hostname"
|
|
else
|
|
echo " ❌ $hostname (not reachable)"
|
|
fi
|
|
done
|
|
echo ""
|
|
echo "Alerts: ${#ALERTS[@]}"
|
|
if [ ${#ALERTS[@]} -gt 0 ]; then
|
|
echo ""
|
|
for alert in "${ALERTS[@]}"; do
|
|
echo " - $alert"
|
|
done
|
|
fi
|
|
echo ""
|
|
echo "Thresholds:"
|
|
echo " Storage Usage Warning: ${WARNING_THRESHOLD}%"
|
|
echo " Storage Usage Critical: ${CRITICAL_THRESHOLD}%"
|
|
echo " Volume Group Free Warning: ${VG_FREE_WARNING}GB"
|
|
echo " Volume Group Free Critical: ${VG_FREE_CRITICAL}GB"
|
|
} > "$summary_file"
|
|
|
|
log_info "Summary saved to: $summary_file"
|
|
}
|
|
|
|
# Main monitoring function
|
|
main() {
|
|
local mode="${1:-check}"
|
|
|
|
case "$mode" in
|
|
check)
|
|
echo "=== Proxmox Storage Monitoring ==="
|
|
echo "Date: $(date)"
|
|
echo ""
|
|
|
|
# Monitor all nodes
|
|
for hostname in "${!NODES[@]}"; do
|
|
monitor_node "$hostname"
|
|
done
|
|
|
|
# Send alerts
|
|
send_alerts
|
|
|
|
# Generate summary
|
|
generate_summary
|
|
|
|
echo ""
|
|
log_info "Monitoring complete. Check logs in: $LOG_DIR"
|
|
;;
|
|
status)
|
|
# Show current status
|
|
echo "=== Current Storage Status ==="
|
|
for hostname in "${!NODES[@]}"; do
|
|
if check_node "$hostname"; then
|
|
echo ""
|
|
echo "--- $hostname ---"
|
|
ssh_node "$hostname" 'pvesm status 2>/dev/null' || echo "Could not get status"
|
|
fi
|
|
done
|
|
;;
|
|
alerts)
|
|
# Show recent alerts
|
|
if [ -f "$ALERT_LOG" ]; then
|
|
tail -50 "$ALERT_LOG"
|
|
else
|
|
echo "No alerts found"
|
|
fi
|
|
;;
|
|
*)
|
|
echo "Usage: $0 [check|status|alerts]"
|
|
echo " check - Run full monitoring check (default)"
|
|
echo " status - Show current storage status"
|
|
echo " alerts - Show recent alerts"
|
|
exit 1
|
|
;;
|
|
esac
|
|
}
|
|
|
|
# Run main function
|
|
main "$@"
|