#!/usr/bin/env bash # Comprehensive Proxmox Storage Review and Recommendations # Reviews all storage across all Proxmox nodes and provides recommendations set -euo pipefail # Load IP configuration SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" source "${PROJECT_ROOT}/config/ip-addresses.conf" 2>/dev/null || true SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" REPORT_DIR="${PROJECT_ROOT}/reports/storage" TIMESTAMP=$(date +%Y%m%d_%H%M%S) REPORT_FILE="${REPORT_DIR}/storage_review_${TIMESTAMP}.md" # Colors RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' BLUE='\033[0;34m' CYAN='\033[0;36m' NC='\033[0m' log_info() { echo -e "${BLUE}[INFO]${NC} $1"; } log_success() { echo -e "${GREEN}[✓]${NC} $1"; } log_warn() { echo -e "${YELLOW}[⚠]${NC} $1"; } log_error() { echo -e "${RED}[✗]${NC} $1"; } log_header() { echo -e "${CYAN}=== $1 ===${NC}"; } # Create report directory mkdir -p "$REPORT_DIR" # Proxmox nodes configuration declare -A NODES NODES[ml110]="${PROXMOX_HOST_ML110:-192.168.11.10}:L@kers2010" NODES[r630-01]="${PROXMOX_HOST_R630_01:-192.168.11.11}:password" NODES[r630-02]="${PROXMOX_HOST_R630_02:-192.168.11.12}:password" NODES[r630-03]="${IP_SERVICE_13:-${IP_SERVICE_13:-${IP_SERVICE_13:-${IP_SERVICE_13:-${IP_SERVICE_13:-${IP_SERVICE_13:-192.168.11.13}}}}}}:L@kers2010" NODES[r630-04]="${IP_DEVICE_14:-${IP_DEVICE_14:-${IP_DEVICE_14:-${IP_DEVICE_14:-${IP_DEVICE_14:-${IP_DEVICE_14:-192.168.11.14}}}}}}:L@kers2010" # Storage data collection declare -A STORAGE_DATA # SSH helper function ssh_node() { local hostname="$1" shift local ip="${NODES[$hostname]%%:*}" local password="${NODES[$hostname]#*:}" if command -v sshpass >/dev/null 2>&1; then sshpass -p "$password" ssh -o StrictHostKeyChecking=no -o ConnectTimeout=5 root@"$ip" "$@" else ssh -o StrictHostKeyChecking=no -o ConnectTimeout=5 root@"$ip" "$@" fi } # Check node connectivity check_node() { local hostname="$1" local ip="${NODES[$hostname]%%:*}" if ping -c 1 -W 2 "$ip" >/dev/null 2>&1; then return 0 else return 1 fi } # Collect storage information from a node collect_storage_info() { local hostname="$1" local ip="${NODES[$hostname]%%:*}" log_info "Collecting storage information from $hostname ($ip)..." if ! check_node "$hostname"; then log_warn "$hostname is not reachable" return 1 fi # Collect storage status local storage_status=$(ssh_node "$hostname" 'pvesm status 2>/dev/null' || echo "") # Collect LVM information local vgs_info=$(ssh_node "$hostname" 'vgs --units g --noheadings -o vg_name,vg_size,vg_free 2>/dev/null' || echo "") local lvs_info=$(ssh_node "$hostname" 'lvs --units g --noheadings -o lv_name,vg_name,lv_size,data_percent,metadata_percent,pool_lv 2>/dev/null | grep -E "(thin|data)"' || echo "") # Collect disk information local disk_info=$(ssh_node "$hostname" 'lsblk -d -o NAME,SIZE,TYPE,MOUNTPOINT 2>/dev/null' || echo "") # Collect VM/container count local vm_count=$(ssh_node "$hostname" 'qm list 2>/dev/null | tail -n +2 | wc -l' || echo "0") local ct_count=$(ssh_node "$hostname" 'pct list 2>/dev/null | tail -n +2 | wc -l' || echo "0") # Collect system resources local mem_info=$(ssh_node "$hostname" 'free -h | grep Mem | awk "{print \$2,\$3,\$7}"' || echo "") local cpu_info=$(ssh_node "$hostname" 'nproc' || echo "0") # Store data STORAGE_DATA["${hostname}_storage"]="$storage_status" STORAGE_DATA["${hostname}_vgs"]="$vgs_info" STORAGE_DATA["${hostname}_lvs"]="$lvs_info" STORAGE_DATA["${hostname}_disks"]="$disk_info" STORAGE_DATA["${hostname}_vms"]="$vm_count" STORAGE_DATA["${hostname}_cts"]="$ct_count" STORAGE_DATA["${hostname}_mem"]="$mem_info" STORAGE_DATA["${hostname}_cpu"]="$cpu_info" log_success "Collected data from $hostname" } # Generate storage report generate_report() { log_header "Generating Storage Review Report" cat > "$REPORT_FILE" <> "$REPORT_FILE" <> "$REPORT_FILE" <> "$REPORT_FILE" <> "$REPORT_FILE" < --disable 0 \`\`\` EOF fi # Check for high usage if echo "$storage_status" | grep -qE "[8-9][0-9]%|[0-9]{2,}%"; then cat >> "$REPORT_FILE" <80%) **Recommendation:** - Monitor storage usage closely - Plan for expansion or cleanup - Consider migrating VMs to other nodes EOF fi # Check for missing LVM thin storage if ! echo "$storage_status" | grep -qE "lvmthin|thin"; then cat >> "$REPORT_FILE" <> "$REPORT_FILE" <80% used 3. **Storage Distribution** - Distribute VMs across multiple nodes - Balance storage usage across nodes - Avoid overloading single node 4. **Backup Storage Strategy** - Use separate storage for backups - Consider NFS or Ceph for shared backups - Implement backup rotation policies ### 4. Capacity Planning #### Current Storage Distribution EOF # Calculate total storage local total_storage=0 local total_used=0 for hostname in "${!NODES[@]}"; do local storage_status="${STORAGE_DATA["${hostname}_storage"]:-}" if [ -n "$storage_status" ]; then # Extract storage sizes (simplified - would need proper parsing) echo "$storage_status" | while IFS= read -r line; do if [[ $line =~ ([0-9]+)T ]] || [[ $line =~ ([0-9]+)G ]]; then # Storage found : fi done fi done cat >> "$REPORT_FILE" <80% alert) - Monitor thin pool metadata usage - Track storage growth trends 2. **Performance Monitoring** - Monitor I/O latency - Track storage throughput - Identify bottlenecks 3. **Automated Alerts** - Storage usage >80% - Thin pool metadata >80% - Storage errors or failures ### 8. Migration Recommendations #### Workload Distribution **Current State:** - ml110: Hosting all VMs (overloaded) - r630-01/r630-02: Underutilized **Recommended Distribution:** - **ml110:** Keep management/lightweight VMs (10-15 VMs) - **r630-01:** Migrate medium workload VMs (10-15 VMs) - **r630-02:** Migrate heavy workload VMs (10-15 VMs) **Benefits:** - Better performance (ml110 CPU is slower) - Better resource utilization - Improved redundancy - Better storage distribution ### 9. Immediate Action Items #### Critical (Do First) 1. ✅ Review storage status on all nodes 2. ⚠️ Enable disabled storage pools 3. ⚠️ Verify storage node restrictions in storage.cfg 4. ⚠️ Check for storage errors or warnings #### High Priority 1. ⚠️ Configure LVM thin storage where missing 2. ⚠️ Set up storage monitoring and alerts 3. ⚠️ Plan VM migration for better distribution 4. ⚠️ Review and optimize storage.cfg #### Recommended 1. ⚠️ Implement backup storage strategy 2. ⚠️ Consider shared storage (NFS/Ceph) for HA 3. ⚠️ Optimize storage performance settings 4. ⚠️ Document storage procedures --- ## Detailed Storage Commands Reference ### Check Storage Status \`\`\`bash # On any Proxmox node pvesm status pvesm list \`\`\` ### Enable Disabled Storage \`\`\`bash pvesm set --disable 0 \`\`\` ### Check LVM Configuration \`\`\`bash vgs # List volume groups lvs # List logical volumes lvs -o +data_percent,metadata_percent # Check thin pool usage \`\`\` ### Check Disk Usage \`\`\`bash df -h # Filesystem usage lsblk # Block devices \`\`\` ### Storage Performance Testing \`\`\`bash # Test storage I/O fio --name=test --ioengine=libaio --iodepth=16 --rw=randwrite --bs=4k --size=1G --runtime=60 \`\`\` --- ## Conclusion This comprehensive storage review provides: - ✅ Current storage status across all nodes - ✅ Detailed analysis of storage configurations - ✅ Performance optimization recommendations - ✅ Capacity planning guidance - ✅ Security and monitoring recommendations - ✅ Migration and distribution strategies **Next Steps:** 1. Review this report 2. Address critical issues first 3. Implement high-priority recommendations 4. Plan for long-term optimizations --- **Report Generated:** $(date) **Report File:** $REPORT_FILE EOF log_success "Report generated: $REPORT_FILE" } # Main execution main() { log_header "Proxmox Storage Comprehensive Review" echo "" # Collect data from all nodes for hostname in "${!NODES[@]}"; do collect_storage_info "$hostname" || log_warn "Failed to collect data from $hostname" echo "" done # Generate report generate_report # Display summary log_header "Review Summary" echo "" log_info "Report saved to: $REPORT_FILE" echo "" log_info "Quick Summary:" for hostname in "${!NODES[@]}"; do if check_node "$hostname"; then local vms="${STORAGE_DATA["${hostname}_vms"]:-0}" local cts="${STORAGE_DATA["${hostname}_cts"]:-0}" echo " $hostname: $vms VMs, $cts Containers" else echo " $hostname: Not reachable" fi done echo "" log_success "Storage review complete!" log_info "View full report: cat $REPORT_FILE" } # Run main function main "$@"