360 lines
14 KiB
Bash
360 lines
14 KiB
Bash
|
|
#!/usr/bin/env bash
|
||
|
|
# Execute Immediate Actions from Hardware/Storage Investigation
|
||
|
|
# 1. Address thin2 (r630-02) capacity issue
|
||
|
|
# 2. Activate inactive storage pools
|
||
|
|
# 3. Identify and migrate CPU-intensive workloads
|
||
|
|
|
||
|
|
set -euo pipefail
|
||
|
|
|
||
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||
|
|
PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||
|
|
REPORT_DIR="${PROJECT_ROOT}/reports/status"
|
||
|
|
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
|
||
|
|
ACTION_LOG="${REPORT_DIR}/immediate_actions_${TIMESTAMP}.log"
|
||
|
|
|
||
|
|
# Colors
|
||
|
|
RED='\033[0;31m'
|
||
|
|
GREEN='\033[0;32m'
|
||
|
|
YELLOW='\033[1;33m'
|
||
|
|
BLUE='\033[0;34m'
|
||
|
|
CYAN='\033[0;36m'
|
||
|
|
MAGENTA='\033[0;35m'
|
||
|
|
NC='\033[0m'
|
||
|
|
|
||
|
|
log_info() { echo -e "${BLUE}[INFO]${NC} $1" | tee -a "$ACTION_LOG"; }
|
||
|
|
log_success() { echo -e "${GREEN}[✓]${NC} $1" | tee -a "$ACTION_LOG"; }
|
||
|
|
log_warn() { echo -e "${YELLOW}[⚠]${NC} $1" | tee -a "$ACTION_LOG"; }
|
||
|
|
log_error() { echo -e "${RED}[✗]${NC} $1" | tee -a "$ACTION_LOG"; }
|
||
|
|
log_header() { echo -e "${CYAN}=== $1 ===${NC}" | tee -a "$ACTION_LOG"; }
|
||
|
|
log_section() { echo -e "\n${MAGENTA}>>> $1 <<<${NC}\n" | tee -a "$ACTION_LOG"; }
|
||
|
|
|
||
|
|
# Create report directory
|
||
|
|
mkdir -p "$REPORT_DIR"
|
||
|
|
|
||
|
|
# Proxmox nodes configuration
|
||
|
|
declare -A NODES
|
||
|
|
NODES[ml110]="192.168.11.10:L@kers2010"
|
||
|
|
NODES[r630-01]="192.168.11.11:password"
|
||
|
|
NODES[r630-02]="192.168.11.12:password"
|
||
|
|
|
||
|
|
# SSH helper function
|
||
|
|
ssh_node() {
|
||
|
|
local hostname="$1"
|
||
|
|
shift
|
||
|
|
local ip="${NODES[$hostname]%%:*}"
|
||
|
|
local password="${NODES[$hostname]#*:}"
|
||
|
|
|
||
|
|
if command -v sshpass >/dev/null 2>&1; then
|
||
|
|
sshpass -p "$password" ssh -o StrictHostKeyChecking=no -o ConnectTimeout=10 root@"$ip" "$@"
|
||
|
|
else
|
||
|
|
ssh -o StrictHostKeyChecking=no -o ConnectTimeout=10 root@"$ip" "$@"
|
||
|
|
fi
|
||
|
|
}
|
||
|
|
|
||
|
|
# Check node connectivity
|
||
|
|
check_node() {
|
||
|
|
local hostname="$1"
|
||
|
|
local ip="${NODES[$hostname]%%:*}"
|
||
|
|
|
||
|
|
if ping -c 1 -W 2 "$ip" >/dev/null 2>&1; then
|
||
|
|
return 0
|
||
|
|
else
|
||
|
|
return 1
|
||
|
|
fi
|
||
|
|
}
|
||
|
|
|
||
|
|
# Action 1: Investigate thin2 (r630-02) capacity issue
|
||
|
|
investigate_thin2_capacity() {
|
||
|
|
log_section "Action 1: Investigating thin2 (r630-02) Capacity Issue"
|
||
|
|
|
||
|
|
local hostname="r630-02"
|
||
|
|
local ip="${NODES[$hostname]%%:*}"
|
||
|
|
|
||
|
|
if ! check_node "$hostname"; then
|
||
|
|
log_error "$hostname is not reachable"
|
||
|
|
return 1
|
||
|
|
fi
|
||
|
|
|
||
|
|
log_info "Checking thin2 storage usage and VMs using it..."
|
||
|
|
|
||
|
|
# Get detailed storage info
|
||
|
|
local storage_info=$(ssh_node "$hostname" "pvesh get /nodes/$hostname/storage/thin2/status 2>/dev/null" || echo "")
|
||
|
|
log_info "thin2 Storage Status:"
|
||
|
|
echo "$storage_info" | tee -a "$ACTION_LOG"
|
||
|
|
echo "" | tee -a "$ACTION_LOG"
|
||
|
|
|
||
|
|
# Find VMs/containers using thin2
|
||
|
|
log_info "Finding VMs/containers using thin2 storage..."
|
||
|
|
|
||
|
|
local vms_using_thin2=$(ssh_node "$hostname" bash <<'ENDSSH'
|
||
|
|
echo "=== QEMU VMs on thin2 ==="
|
||
|
|
for vmid in $(qm list 2>/dev/null | tail -n +2 | awk '{print $1}'); do
|
||
|
|
storage=$(qm config $vmid 2>/dev/null | grep -E "^(scsi|virtio|ide|sata)[0-9]+:" | grep -o "storage=[^,]*" | cut -d= -f2 | head -1)
|
||
|
|
if [ "$storage" = "thin2" ]; then
|
||
|
|
name=$(qm config $vmid 2>/dev/null | grep "^name:" | cut -d: -f2 | xargs || echo "VM-$vmid")
|
||
|
|
status=$(qm status $vmid 2>/dev/null | awk '{print $2}')
|
||
|
|
echo "VMID: $vmid | Name: $name | Status: $status"
|
||
|
|
fi
|
||
|
|
done
|
||
|
|
|
||
|
|
echo ""
|
||
|
|
echo "=== LXC Containers on thin2 ==="
|
||
|
|
for vmid in $(pct list 2>/dev/null | tail -n +2 | awk '{print $1}'); do
|
||
|
|
storage=$(pct config $vmid 2>/dev/null | grep -E "^rootfs:" | grep -o "storage=[^,]*" | cut -d= -f2 | head -1)
|
||
|
|
if [ "$storage" = "thin2" ]; then
|
||
|
|
name=$(pct config $vmid 2>/dev/null | grep "^hostname:" | cut -d: -f2 | xargs || echo "CT-$vmid")
|
||
|
|
status=$(pct status $vmid 2>/dev/null | awk '{print $2}')
|
||
|
|
echo "VMID: $vmid | Name: $name | Status: $status"
|
||
|
|
fi
|
||
|
|
done
|
||
|
|
ENDSSH
|
||
|
|
)
|
||
|
|
|
||
|
|
echo "$vms_using_thin2" | tee -a "$ACTION_LOG"
|
||
|
|
echo "" | tee -a "$ACTION_LOG"
|
||
|
|
|
||
|
|
# Check available space in other thin pools
|
||
|
|
log_info "Checking available space in other storage pools on r630-02..."
|
||
|
|
|
||
|
|
local other_storage=$(ssh_node "$hostname" "pvesm status 2>/dev/null | grep -E 'thin[1-6]|thin1-r630-02' | grep -v thin2" || echo "")
|
||
|
|
log_info "Available storage pools:"
|
||
|
|
echo "$other_storage" | tee -a "$ACTION_LOG"
|
||
|
|
echo "" | tee -a "$ACTION_LOG"
|
||
|
|
|
||
|
|
# Get snapshots on thin2
|
||
|
|
log_info "Checking for snapshots on thin2..."
|
||
|
|
|
||
|
|
local snapshots=$(ssh_node "$hostname" bash <<'ENDSSH'
|
||
|
|
echo "=== Snapshots on thin2 ==="
|
||
|
|
for vmid in $(qm list 2>/dev/null | tail -n +2 | awk '{print $1}'); do
|
||
|
|
storage=$(qm config $vmid 2>/dev/null | grep -E "^(scsi|virtio|ide|sata)[0-9]+:" | grep -o "storage=[^,]*" | cut -d= -f2 | head -1)
|
||
|
|
if [ "$storage" = "thin2" ]; then
|
||
|
|
snap_list=$(qm listsnapshot $vmid 2>/dev/null | tail -n +2 || echo "")
|
||
|
|
if [ -n "$snap_list" ]; then
|
||
|
|
echo "VM $vmid snapshots:"
|
||
|
|
echo "$snap_list" | head -10
|
||
|
|
fi
|
||
|
|
fi
|
||
|
|
done
|
||
|
|
|
||
|
|
for vmid in $(pct list 2>/dev/null | tail -n +2 | awk '{print $1}'); do
|
||
|
|
storage=$(pct config $vmid 2>/dev/null | grep -E "^rootfs:" | grep -o "storage=[^,]*" | cut -d= -f2 | head -1)
|
||
|
|
if [ "$storage" = "thin2" ]; then
|
||
|
|
snap_list=$(pct listsnapshot $vmid 2>/dev/null | tail -n +2 || echo "")
|
||
|
|
if [ -n "$snap_list" ]; then
|
||
|
|
echo "CT $vmid snapshots:"
|
||
|
|
echo "$snap_list" | head -10
|
||
|
|
fi
|
||
|
|
fi
|
||
|
|
done
|
||
|
|
ENDSSH
|
||
|
|
)
|
||
|
|
|
||
|
|
echo "$snapshots" | tee -a "$ACTION_LOG"
|
||
|
|
echo "" | tee -a "$ACTION_LOG"
|
||
|
|
|
||
|
|
log_success "thin2 investigation complete. Review output above for cleanup/migration opportunities."
|
||
|
|
}
|
||
|
|
|
||
|
|
# Action 2: Activate inactive storage pools
|
||
|
|
activate_inactive_storage() {
|
||
|
|
log_section "Action 2: Activating Inactive Storage Pools"
|
||
|
|
|
||
|
|
# Activate thin1 on ml110
|
||
|
|
log_info "Checking thin1 on ml110..."
|
||
|
|
local hostname="ml110"
|
||
|
|
|
||
|
|
if check_node "$hostname"; then
|
||
|
|
local thin1_status=$(ssh_node "$hostname" "pvesh get /nodes/$hostname/storage/thin1/status 2>/dev/null" || echo "")
|
||
|
|
log_info "thin1 (ml110) current status:"
|
||
|
|
echo "$thin1_status" | tee -a "$ACTION_LOG"
|
||
|
|
|
||
|
|
# Check if it's a node restriction issue
|
||
|
|
local storage_config=$(ssh_node "$hostname" "pvesh get /nodes/$hostname/storage/thin1 2>/dev/null | grep -E 'nodes|content' || echo ''" || echo "")
|
||
|
|
log_info "Storage configuration:"
|
||
|
|
echo "$storage_config" | tee -a "$ACTION_LOG"
|
||
|
|
|
||
|
|
# Try to activate by checking LVM
|
||
|
|
log_info "Checking underlying LVM volume group..."
|
||
|
|
local vg_info=$(ssh_node "$hostname" "vgs 2>/dev/null | grep -E 'thin|VG' || echo 'No VGs found'" || echo "")
|
||
|
|
echo "$vg_info" | tee -a "$ACTION_LOG"
|
||
|
|
|
||
|
|
log_warn "thin1 on ml110 appears to be configured but inactive. May need manual investigation."
|
||
|
|
else
|
||
|
|
log_error "ml110 is not reachable"
|
||
|
|
fi
|
||
|
|
|
||
|
|
echo "" | tee -a "$ACTION_LOG"
|
||
|
|
|
||
|
|
# Activate data and thin1 on r630-02
|
||
|
|
log_info "Checking data and thin1 on r630-02..."
|
||
|
|
hostname="r630-02"
|
||
|
|
|
||
|
|
if check_node "$hostname"; then
|
||
|
|
for storage in "data" "thin1"; do
|
||
|
|
log_info "Checking $storage on $hostname..."
|
||
|
|
local storage_status=$(ssh_node "$hostname" "pvesh get /nodes/$hostname/storage/$storage/status 2>/dev/null" || echo "")
|
||
|
|
log_info "$storage ($hostname) current status:"
|
||
|
|
echo "$storage_status" | tee -a "$ACTION_LOG"
|
||
|
|
|
||
|
|
# Check LVM status
|
||
|
|
log_info "Checking underlying LVM for $storage..."
|
||
|
|
local lv_info=$(ssh_node "$hostname" "lvs 2>/dev/null | grep -E '$storage|LV' || echo 'No LVs found'" || echo "")
|
||
|
|
echo "$lv_info" | tee -a "$ACTION_LOG"
|
||
|
|
|
||
|
|
# Check if storage is enabled but inactive
|
||
|
|
local enabled=$(ssh_node "$hostname" "pvesh get /nodes/$hostname/storage/$storage 2>/dev/null | grep -oP 'enable.*?:\s*\K[^,}]+' | head -1" || echo "")
|
||
|
|
if [ "$enabled" = "1" ] || [ "$enabled" = "true" ]; then
|
||
|
|
log_info "$storage is enabled but inactive. Checking if we can activate..."
|
||
|
|
|
||
|
|
# Try to activate by checking if volume group exists
|
||
|
|
local vg_name=$(ssh_node "$hostname" "pvesh get /nodes/$hostname/storage/$storage 2>/dev/null | grep -oP 'vgname.*?:\s*\K[^,}]+' | head -1" || echo "")
|
||
|
|
if [ -n "$vg_name" ]; then
|
||
|
|
log_info "Volume group for $storage: $vg_name"
|
||
|
|
local vg_exists=$(ssh_node "$hostname" "vgs $vg_name 2>/dev/null | grep -c $vg_name || echo '0'" || echo "0")
|
||
|
|
if [ "$vg_exists" = "0" ]; then
|
||
|
|
log_warn "Volume group $vg_name does not exist. Storage cannot be activated."
|
||
|
|
else
|
||
|
|
log_info "Volume group exists. Storage should be activatable."
|
||
|
|
fi
|
||
|
|
fi
|
||
|
|
fi
|
||
|
|
echo "" | tee -a "$ACTION_LOG"
|
||
|
|
done
|
||
|
|
else
|
||
|
|
log_error "r630-02 is not reachable"
|
||
|
|
fi
|
||
|
|
|
||
|
|
log_success "Storage activation check complete."
|
||
|
|
}
|
||
|
|
|
||
|
|
# Action 3: Identify CPU-intensive workloads on ml110
|
||
|
|
identify_cpu_intensive_workloads() {
|
||
|
|
log_section "Action 3: Identifying CPU-Intensive Workloads on ml110"
|
||
|
|
|
||
|
|
local hostname="ml110"
|
||
|
|
|
||
|
|
if ! check_node "$hostname"; then
|
||
|
|
log_error "$hostname is not reachable"
|
||
|
|
return 1
|
||
|
|
fi
|
||
|
|
|
||
|
|
log_info "Getting CPU usage for all VMs and containers on ml110..."
|
||
|
|
|
||
|
|
local cpu_usage=$(ssh_node "$hostname" bash <<'ENDSSH'
|
||
|
|
echo "=== QEMU VMs CPU Usage ==="
|
||
|
|
for vmid in $(qm list 2>/dev/null | tail -n +2 | awk '{print $1}'); do
|
||
|
|
if qm status $vmid 2>/dev/null | grep -q "status: running"; then
|
||
|
|
name=$(qm config $vmid 2>/dev/null | grep "^name:" | cut -d: -f2 | xargs || echo "VM-$vmid")
|
||
|
|
cpu_percent=$(qm status $vmid 2>/dev/null | grep -oP 'cpu.*?:\s*\K[0-9.]+' || echo "0")
|
||
|
|
cpus=$(qm config $vmid 2>/dev/null | grep "^cores:" | cut -d: -f2 | xargs || echo "1")
|
||
|
|
mem=$(qm config $vmid 2>/dev/null | grep "^memory:" | cut -d: -f2 | xargs || echo "0")
|
||
|
|
echo "VMID: $vmid | Name: $name | CPUs: $cpus | CPU%: $cpu_percent | Memory: $mem"
|
||
|
|
fi
|
||
|
|
done
|
||
|
|
|
||
|
|
echo ""
|
||
|
|
echo "=== LXC Containers CPU Usage ==="
|
||
|
|
for vmid in $(pct list 2>/dev/null | tail -n +2 | awk '{print $1}'); do
|
||
|
|
if pct status $vmid 2>/dev/null | grep -q "status: running"; then
|
||
|
|
name=$(pct config $vmid 2>/dev/null | grep "^hostname:" | cut -d: -f2 | xargs || echo "CT-$vmid")
|
||
|
|
# Get CPU usage from inside container if possible
|
||
|
|
cpu_info=$(pct exec $vmid -- top -bn1 2>/dev/null | grep "Cpu(s)" | awk '{print $2}' | sed 's/%us,//' || echo "N/A")
|
||
|
|
cpus=$(pct config $vmid 2>/dev/null | grep "^cores:" | cut -d: -f2 | xargs || echo "1")
|
||
|
|
memory=$(pct config $vmid 2>/dev/null | grep "^memory:" | cut -d: -f2 | xargs || echo "0")
|
||
|
|
echo "VMID: $vmid | Name: $name | CPUs: $cpus | CPU Info: $cpu_info | Memory: $memory"
|
||
|
|
fi
|
||
|
|
done
|
||
|
|
|
||
|
|
echo ""
|
||
|
|
echo "=== Top CPU Consumers (Host Level) ==="
|
||
|
|
top -bn1 | head -20
|
||
|
|
ENDSSH
|
||
|
|
)
|
||
|
|
|
||
|
|
echo "$cpu_usage" | tee -a "$ACTION_LOG"
|
||
|
|
echo "" | tee -a "$ACTION_LOG"
|
||
|
|
|
||
|
|
# Get detailed VM/container list with resource allocation
|
||
|
|
log_info "Getting detailed resource allocation..."
|
||
|
|
|
||
|
|
local resource_allocation=$(ssh_node "$hostname" bash <<'ENDSSH'
|
||
|
|
echo "=== Resource Allocation Summary ==="
|
||
|
|
echo ""
|
||
|
|
echo "QEMU VMs:"
|
||
|
|
qm list 2>/dev/null | head -1
|
||
|
|
qm list 2>/dev/null | tail -n +2 | awk '{printf "%-6s %-30s %-10s %-10s\n", $1, $2, $3, $4}'
|
||
|
|
|
||
|
|
echo ""
|
||
|
|
echo "LXC Containers:"
|
||
|
|
pct list 2>/dev/null | head -1
|
||
|
|
pct list 2>/dev/null | tail -n +2 | awk '{printf "%-6s %-30s %-10s %-10s\n", $1, $2, $3, $4}'
|
||
|
|
ENDSSH
|
||
|
|
)
|
||
|
|
|
||
|
|
echo "$resource_allocation" | tee -a "$ACTION_LOG"
|
||
|
|
echo "" | tee -a "$ACTION_LOG"
|
||
|
|
|
||
|
|
log_success "CPU-intensive workload identification complete."
|
||
|
|
}
|
||
|
|
|
||
|
|
# Action 4: Check migration readiness
|
||
|
|
check_migration_readiness() {
|
||
|
|
log_section "Action 4: Checking Migration Readiness"
|
||
|
|
|
||
|
|
log_info "Checking available resources on target nodes..."
|
||
|
|
|
||
|
|
for hostname in "r630-01" "r630-02"; do
|
||
|
|
if check_node "$hostname"; then
|
||
|
|
log_info "Resources on $hostname:"
|
||
|
|
|
||
|
|
local resources=$(ssh_node "$hostname" bash <<'ENDSSH'
|
||
|
|
echo "CPU Cores: $(nproc)"
|
||
|
|
echo "CPU Usage: $(top -bn1 | grep 'Cpu(s)' | awk '{print $2}' | sed 's/%us,//')"
|
||
|
|
echo "Memory Total: $(free -h | grep Mem | awk '{print $2}')"
|
||
|
|
echo "Memory Used: $(free -h | grep Mem | awk '{print $3}')"
|
||
|
|
echo "Memory Available: $(free -h | grep Mem | awk '{print $7}')"
|
||
|
|
echo "Memory Usage: $(free | grep Mem | awk '{printf "%.1f%%", $3/$2 * 100.0}')"
|
||
|
|
echo ""
|
||
|
|
echo "Storage Available:"
|
||
|
|
pvesm status 2>/dev/null | grep -E "active" | awk '{printf " %-20s %8s available\n", $1, $5}'
|
||
|
|
echo ""
|
||
|
|
echo "Current VMs/Containers:"
|
||
|
|
echo " QEMU VMs: $(qm list 2>/dev/null | tail -n +2 | wc -l)"
|
||
|
|
echo " LXC Containers: $(pct list 2>/dev/null | tail -n +2 | wc -l)"
|
||
|
|
ENDSSH
|
||
|
|
)
|
||
|
|
|
||
|
|
echo "$resources" | tee -a "$ACTION_LOG"
|
||
|
|
echo "" | tee -a "$ACTION_LOG"
|
||
|
|
else
|
||
|
|
log_error "$hostname is not reachable"
|
||
|
|
fi
|
||
|
|
done
|
||
|
|
|
||
|
|
log_success "Migration readiness check complete."
|
||
|
|
}
|
||
|
|
|
||
|
|
# Main execution
|
||
|
|
main() {
|
||
|
|
log_header "Executing Immediate Actions from Hardware/Storage Investigation"
|
||
|
|
echo "Log file: $ACTION_LOG" | tee -a "$ACTION_LOG"
|
||
|
|
echo "Timestamp: $(date)" | tee -a "$ACTION_LOG"
|
||
|
|
echo "" | tee -a "$ACTION_LOG"
|
||
|
|
|
||
|
|
# Execute all actions
|
||
|
|
investigate_thin2_capacity
|
||
|
|
activate_inactive_storage
|
||
|
|
identify_cpu_intensive_workloads
|
||
|
|
check_migration_readiness
|
||
|
|
|
||
|
|
log_header "Immediate Actions Execution Complete"
|
||
|
|
log_info "Full log saved to: $ACTION_LOG"
|
||
|
|
log_info "Review the log for detailed information and next steps."
|
||
|
|
echo ""
|
||
|
|
log_success "All immediate actions have been executed!"
|
||
|
|
}
|
||
|
|
|
||
|
|
# Run main function
|
||
|
|
main "$@"
|