Files
proxmox/scripts/review-all-storage.sh.bak
defiQUG fbda1b4beb
Some checks failed
Deploy to Phoenix / deploy (push) Has been cancelled
docs: Ledger Live integration, contract deploy learnings, NEXT_STEPS updates
- ADD_CHAIN138_TO_LEDGER_LIVE: Ledger form done; public code review repo bis-innovations/LedgerLive; init/push commands
- CONTRACT_DEPLOYMENT_RUNBOOK: Chain 138 gas price 1 gwei, 36-addr check, TransactionMirror workaround
- CONTRACT_*: AddressMapper, MirrorManager deployed 2026-02-12; 36-address on-chain check
- NEXT_STEPS_FOR_YOU: Ledger done; steps completable now (no LAN); run-completable-tasks-from-anywhere
- MASTER_INDEX, OPERATOR_OPTIONAL, SMART_CONTRACTS_INVENTORY_SIMPLE: updates
- LEDGER_BLOCKCHAIN_INTEGRATION_COMPLETE: bis-innovations/LedgerLive reference

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-02-12 15:46:57 -08:00

502 lines
13 KiB
Bash
Executable File

#!/usr/bin/env bash
# Comprehensive Proxmox Storage Review and Recommendations
# Reviews all storage across all Proxmox nodes and provides recommendations
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
REPORT_DIR="${PROJECT_ROOT}/reports/storage"
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
REPORT_FILE="${REPORT_DIR}/storage_review_${TIMESTAMP}.md"
# Colors
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
CYAN='\033[0;36m'
NC='\033[0m'
log_info() { echo -e "${BLUE}[INFO]${NC} $1"; }
log_success() { echo -e "${GREEN}[✓]${NC} $1"; }
log_warn() { echo -e "${YELLOW}[⚠]${NC} $1"; }
log_error() { echo -e "${RED}[✗]${NC} $1"; }
log_header() { echo -e "${CYAN}=== $1 ===${NC}"; }
# Create report directory
mkdir -p "$REPORT_DIR"
# Proxmox nodes configuration
declare -A NODES
NODES[ml110]="192.168.11.10:L@kers2010"
NODES[r630-01]="192.168.11.11:password"
NODES[r630-02]="192.168.11.12:password"
NODES[r630-03]="192.168.11.13:L@kers2010"
NODES[r630-04]="192.168.11.14:L@kers2010"
# Storage data collection
declare -A STORAGE_DATA
# SSH helper function
ssh_node() {
local hostname="$1"
shift
local ip="${NODES[$hostname]%%:*}"
local password="${NODES[$hostname]#*:}"
if command -v sshpass >/dev/null 2>&1; then
sshpass -p "$password" ssh -o StrictHostKeyChecking=no -o ConnectTimeout=5 root@"$ip" "$@"
else
ssh -o StrictHostKeyChecking=no -o ConnectTimeout=5 root@"$ip" "$@"
fi
}
# Check node connectivity
check_node() {
local hostname="$1"
local ip="${NODES[$hostname]%%:*}"
if ping -c 1 -W 2 "$ip" >/dev/null 2>&1; then
return 0
else
return 1
fi
}
# Collect storage information from a node
collect_storage_info() {
local hostname="$1"
local ip="${NODES[$hostname]%%:*}"
log_info "Collecting storage information from $hostname ($ip)..."
if ! check_node "$hostname"; then
log_warn "$hostname is not reachable"
return 1
fi
# Collect storage status
local storage_status=$(ssh_node "$hostname" 'pvesm status 2>/dev/null' || echo "")
# Collect LVM information
local vgs_info=$(ssh_node "$hostname" 'vgs --units g --noheadings -o vg_name,vg_size,vg_free 2>/dev/null' || echo "")
local lvs_info=$(ssh_node "$hostname" 'lvs --units g --noheadings -o lv_name,vg_name,lv_size,data_percent,metadata_percent,pool_lv 2>/dev/null | grep -E "(thin|data)"' || echo "")
# Collect disk information
local disk_info=$(ssh_node "$hostname" 'lsblk -d -o NAME,SIZE,TYPE,MOUNTPOINT 2>/dev/null' || echo "")
# Collect VM/container count
local vm_count=$(ssh_node "$hostname" 'qm list 2>/dev/null | tail -n +2 | wc -l' || echo "0")
local ct_count=$(ssh_node "$hostname" 'pct list 2>/dev/null | tail -n +2 | wc -l' || echo "0")
# Collect system resources
local mem_info=$(ssh_node "$hostname" 'free -h | grep Mem | awk "{print \$2,\$3,\$7}"' || echo "")
local cpu_info=$(ssh_node "$hostname" 'nproc' || echo "0")
# Store data
STORAGE_DATA["${hostname}_storage"]="$storage_status"
STORAGE_DATA["${hostname}_vgs"]="$vgs_info"
STORAGE_DATA["${hostname}_lvs"]="$lvs_info"
STORAGE_DATA["${hostname}_disks"]="$disk_info"
STORAGE_DATA["${hostname}_vms"]="$vm_count"
STORAGE_DATA["${hostname}_cts"]="$ct_count"
STORAGE_DATA["${hostname}_mem"]="$mem_info"
STORAGE_DATA["${hostname}_cpu"]="$cpu_info"
log_success "Collected data from $hostname"
}
# Generate storage report
generate_report() {
log_header "Generating Storage Review Report"
cat > "$REPORT_FILE" <<EOF
# Proxmox Storage Comprehensive Review
**Date:** $(date)
**Report Generated:** $(date -u +"%Y-%m-%d %H:%M:%S UTC")
**Review Scope:** All Proxmox nodes and storage configurations
---
## Executive Summary
This report provides a comprehensive review of all storage configurations across all Proxmox nodes, including:
- Current storage status and usage
- Storage type analysis
- Performance recommendations
- Capacity planning
- Optimization suggestions
---
## Node Overview
EOF
# Process each node
for hostname in "${!NODES[@]}"; do
local ip="${NODES[$hostname]%%:*}"
cat >> "$REPORT_FILE" <<EOF
### $hostname ($ip)
**Status:** $(if check_node "$hostname"; then echo "✅ Reachable"; else echo "❌ Not Reachable"; fi)
**System Resources:**
- CPU Cores: ${STORAGE_DATA["${hostname}_cpu"]:-Unknown}
- Memory: ${STORAGE_DATA["${hostname}_mem"]:-Unknown}
- VMs: ${STORAGE_DATA["${hostname}_vms"]:-0}
- Containers: ${STORAGE_DATA["${hostname}_cts"]:-0}
**Storage Status:**
\`\`\`
${STORAGE_DATA["${hostname}_storage"]:-No storage data available}
\`\`\`
**Volume Groups:**
\`\`\`
${STORAGE_DATA["${hostname}_vgs"]:-No volume groups found}
\`\`\`
**Thin Pools:**
\`\`\`
${STORAGE_DATA["${hostname}_lvs"]:-No thin pools found}
\`\`\`
**Physical Disks:**
\`\`\`
${STORAGE_DATA["${hostname}_disks"]:-No disk information available}
\`\`\`
---
EOF
done
# Add recommendations section
cat >> "$REPORT_FILE" <<EOF
## Storage Analysis and Recommendations
### 1. Storage Type Analysis
#### Local Storage (Directory-based)
- **Purpose:** ISO images, container templates, backups
- **Performance:** Good for read-heavy workloads
- **Recommendation:** Use for templates and ISOs, not for VM disks
#### LVM Thin Storage
- **Purpose:** VM/container disk images
- **Performance:** Excellent with thin provisioning
- **Benefits:** Space efficiency, snapshots, cloning
- **Recommendation:** ✅ **Preferred for VM/container disks**
#### ZFS Storage
- **Purpose:** High-performance VM storage
- **Performance:** Excellent with compression and deduplication
- **Benefits:** Data integrity, snapshots, clones
- **Recommendation:** Consider for high-performance workloads
### 2. Critical Issues and Fixes
EOF
# Analyze each node and add recommendations
for hostname in "${!NODES[@]}"; do
local storage_status="${STORAGE_DATA["${hostname}_storage"]:-}"
if [ -z "$storage_status" ]; then
continue
fi
cat >> "$REPORT_FILE" <<EOF
#### $hostname Storage Issues
EOF
# Check for disabled storage
if echo "$storage_status" | grep -q "disabled\|inactive"; then
cat >> "$REPORT_FILE" <<EOF
⚠️ **Issue:** Some storage pools are disabled or inactive
**Action Required:**
\`\`\`bash
ssh root@${NODES[$hostname]%%:*}
pvesm status
# Enable disabled storage:
pvesm set <storage-name> --disable 0
\`\`\`
EOF
fi
# Check for high usage
if echo "$storage_status" | grep -qE "[8-9][0-9]%|[0-9]{2,}%"; then
cat >> "$REPORT_FILE" <<EOF
⚠️ **Issue:** Storage usage is high (>80%)
**Recommendation:**
- Monitor storage usage closely
- Plan for expansion or cleanup
- Consider migrating VMs to other nodes
EOF
fi
# Check for missing LVM thin storage
if ! echo "$storage_status" | grep -qE "lvmthin|thin"; then
cat >> "$REPORT_FILE" <<EOF
⚠️ **Issue:** No LVM thin storage configured
**Recommendation:**
- Configure LVM thin storage for better performance
- Use thin provisioning for space efficiency
- Enable snapshots and cloning capabilities
EOF
fi
done
# Add general recommendations
cat >> "$REPORT_FILE" <<EOF
### 3. Performance Optimization Recommendations
#### Storage Performance Best Practices
1. **Use LVM Thin for VM Disks**
- Better performance than directory storage
- Thin provisioning saves space
- Enables snapshots and cloning
2. **Monitor Thin Pool Metadata Usage**
- Thin pools require metadata space
- Monitor metadata_percent in lvs output
- Expand metadata if >80% used
3. **Storage Distribution**
- Distribute VMs across multiple nodes
- Balance storage usage across nodes
- Avoid overloading single node
4. **Backup Storage Strategy**
- Use separate storage for backups
- Consider NFS or Ceph for shared backups
- Implement backup rotation policies
### 4. Capacity Planning
#### Current Storage Distribution
EOF
# Calculate total storage
local total_storage=0
local total_used=0
for hostname in "${!NODES[@]}"; do
local storage_status="${STORAGE_DATA["${hostname}_storage"]:-}"
if [ -n "$storage_status" ]; then
# Extract storage sizes (simplified - would need proper parsing)
echo "$storage_status" | while IFS= read -r line; do
if [[ $line =~ ([0-9]+)T ]] || [[ $line =~ ([0-9]+)G ]]; then
# Storage found
:
fi
done
fi
done
cat >> "$REPORT_FILE" <<EOF
**Recommendations:**
- Monitor storage growth trends
- Plan for 20-30% headroom
- Set alerts at 80% usage
- Consider storage expansion before reaching capacity
### 5. Storage Type Recommendations by Use Case
| Use Case | Recommended Storage Type | Reason |
|----------|-------------------------|--------|
| VM/Container Disks | LVM Thin (lvmthin) | Best performance, thin provisioning |
| ISO Images | Directory (dir) | Read-only, no performance impact |
| Container Templates | Directory (dir) | Templates are read-only |
| Backups | Directory or NFS | Separate from production storage |
| High-Performance VMs | ZFS or LVM Thin | Best I/O performance |
| Development/Test | LVM Thin | Space efficient with cloning |
### 6. Security Recommendations
1. **Storage Access Control**
- Review storage.cfg node restrictions
- Ensure proper node assignments
- Verify storage permissions
2. **Backup Security**
- Encrypt backups if containing sensitive data
- Store backups off-site
- Test backup restoration regularly
### 7. Monitoring Recommendations
1. **Set Up Storage Monitoring**
- Monitor storage usage (>80% alert)
- Monitor thin pool metadata usage
- Track storage growth trends
2. **Performance Monitoring**
- Monitor I/O latency
- Track storage throughput
- Identify bottlenecks
3. **Automated Alerts**
- Storage usage >80%
- Thin pool metadata >80%
- Storage errors or failures
### 8. Migration Recommendations
#### Workload Distribution
**Current State:**
- ml110: Hosting all VMs (overloaded)
- r630-01/r630-02: Underutilized
**Recommended Distribution:**
- **ml110:** Keep management/lightweight VMs (10-15 VMs)
- **r630-01:** Migrate medium workload VMs (10-15 VMs)
- **r630-02:** Migrate heavy workload VMs (10-15 VMs)
**Benefits:**
- Better performance (ml110 CPU is slower)
- Better resource utilization
- Improved redundancy
- Better storage distribution
### 9. Immediate Action Items
#### Critical (Do First)
1. ✅ Review storage status on all nodes
2. ⚠️ Enable disabled storage pools
3. ⚠️ Verify storage node restrictions in storage.cfg
4. ⚠️ Check for storage errors or warnings
#### High Priority
1. ⚠️ Configure LVM thin storage where missing
2. ⚠️ Set up storage monitoring and alerts
3. ⚠️ Plan VM migration for better distribution
4. ⚠️ Review and optimize storage.cfg
#### Recommended
1. ⚠️ Implement backup storage strategy
2. ⚠️ Consider shared storage (NFS/Ceph) for HA
3. ⚠️ Optimize storage performance settings
4. ⚠️ Document storage procedures
---
## Detailed Storage Commands Reference
### Check Storage Status
\`\`\`bash
# On any Proxmox node
pvesm status
pvesm list <storage-name>
\`\`\`
### Enable Disabled Storage
\`\`\`bash
pvesm set <storage-name> --disable 0
\`\`\`
### Check LVM Configuration
\`\`\`bash
vgs # List volume groups
lvs # List logical volumes
lvs -o +data_percent,metadata_percent # Check thin pool usage
\`\`\`
### Check Disk Usage
\`\`\`bash
df -h # Filesystem usage
lsblk # Block devices
\`\`\`
### Storage Performance Testing
\`\`\`bash
# Test storage I/O
fio --name=test --ioengine=libaio --iodepth=16 --rw=randwrite --bs=4k --size=1G --runtime=60
\`\`\`
---
## Conclusion
This comprehensive storage review provides:
- ✅ Current storage status across all nodes
- ✅ Detailed analysis of storage configurations
- ✅ Performance optimization recommendations
- ✅ Capacity planning guidance
- ✅ Security and monitoring recommendations
- ✅ Migration and distribution strategies
**Next Steps:**
1. Review this report
2. Address critical issues first
3. Implement high-priority recommendations
4. Plan for long-term optimizations
---
**Report Generated:** $(date)
**Report File:** $REPORT_FILE
EOF
log_success "Report generated: $REPORT_FILE"
}
# Main execution
main() {
log_header "Proxmox Storage Comprehensive Review"
echo ""
# Collect data from all nodes
for hostname in "${!NODES[@]}"; do
collect_storage_info "$hostname" || log_warn "Failed to collect data from $hostname"
echo ""
done
# Generate report
generate_report
# Display summary
log_header "Review Summary"
echo ""
log_info "Report saved to: $REPORT_FILE"
echo ""
log_info "Quick Summary:"
for hostname in "${!NODES[@]}"; do
if check_node "$hostname"; then
local vms="${STORAGE_DATA["${hostname}_vms"]:-0}"
local cts="${STORAGE_DATA["${hostname}_cts"]:-0}"
echo " $hostname: $vms VMs, $cts Containers"
else
echo " $hostname: Not reachable"
fi
done
echo ""
log_success "Storage review complete!"
log_info "View full report: cat $REPORT_FILE"
}
# Run main function
main "$@"