2026-01-06 01:46:25 -08:00
|
|
|
#!/usr/bin/env bash
|
|
|
|
|
# Repair and redeploy thin1-thin3 storage pools on pve and pve2
|
|
|
|
|
# This script removes broken storage configurations and recreates them properly
|
|
|
|
|
|
|
|
|
|
set -euo pipefail
|
|
|
|
|
|
2026-02-12 15:46:57 -08:00
|
|
|
# Load IP configuration
|
|
|
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
|
|
|
PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
|
|
|
|
source "${PROJECT_ROOT}/config/ip-addresses.conf" 2>/dev/null || true
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
PROXMOX_HOST_PVE="${PROXMOX_HOST_R630_01}"
|
|
|
|
|
PROXMOX_HOST_PVE2="${PROXMOX_HOST_R630_02}"
|
2026-01-06 01:46:25 -08:00
|
|
|
PVE_PASS="password"
|
|
|
|
|
STORAGE_POOLS=("thin1" "thin2" "thin3")
|
|
|
|
|
|
|
|
|
|
# Colors
|
|
|
|
|
RED='\033[0;31m'
|
|
|
|
|
GREEN='\033[0;32m'
|
|
|
|
|
YELLOW='\033[1;33m'
|
|
|
|
|
BLUE='\033[0;34m'
|
|
|
|
|
NC='\033[0m'
|
|
|
|
|
|
|
|
|
|
log_info() { echo -e "${BLUE}[INFO]${NC} $1"; }
|
|
|
|
|
log_success() { echo -e "${GREEN}[✓]${NC} $1"; }
|
|
|
|
|
log_warn() { echo -e "${YELLOW}[WARN]${NC} $1"; }
|
|
|
|
|
log_error() { echo -e "${RED}[ERROR]${NC} $1"; }
|
|
|
|
|
|
|
|
|
|
# Check what's using a storage pool
|
|
|
|
|
check_storage_usage() {
|
|
|
|
|
local host=$1
|
|
|
|
|
local storage=$2
|
|
|
|
|
|
|
|
|
|
log_info "Checking if $storage is in use on $host..."
|
|
|
|
|
|
|
|
|
|
local usage=$(sshpass -p "$PVE_PASS" ssh -o StrictHostKeyChecking=accept-new root@${host} \
|
|
|
|
|
"pvesm list $storage 2>/dev/null | wc -l" || echo "0")
|
|
|
|
|
|
|
|
|
|
if [ "$usage" -gt 1 ]; then
|
|
|
|
|
log_warn "Storage $storage has $((usage - 1)) items (header excluded)"
|
|
|
|
|
return 1
|
|
|
|
|
else
|
|
|
|
|
log_info "Storage $storage appears to be unused"
|
|
|
|
|
return 0
|
|
|
|
|
fi
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
# Remove storage from Proxmox
|
|
|
|
|
remove_storage() {
|
|
|
|
|
local host=$1
|
|
|
|
|
local storage=$2
|
|
|
|
|
|
|
|
|
|
log_info "Removing storage $storage from Proxmox on $host..."
|
|
|
|
|
|
|
|
|
|
# Check if storage exists
|
|
|
|
|
if ! sshpass -p "$PVE_PASS" ssh -o StrictHostKeyChecking=accept-new root@${host} \
|
|
|
|
|
"pvesm status 2>/dev/null | grep -q '$storage'" 2>/dev/null; then
|
|
|
|
|
log_info "Storage $storage does not exist, skipping removal"
|
|
|
|
|
return 0
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
# Remove from Proxmox config
|
|
|
|
|
sshpass -p "$PVE_PASS" ssh -o StrictHostKeyChecking=accept-new root@${host} \
|
|
|
|
|
"pvesm remove $storage 2>&1" || {
|
|
|
|
|
log_warn "Failed to remove via pvesm, trying to edit config directly..."
|
|
|
|
|
# Remove from storage.cfg
|
|
|
|
|
sshpass -p "$PVE_PASS" ssh -o StrictHostKeyChecking=accept-new root@${host} \
|
|
|
|
|
"sed -i '/^lvmthin: $storage$/,/^$/d' /etc/pve/storage.cfg 2>/dev/null" || true
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
log_success "Storage $storage removed from Proxmox configuration"
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
# Get volume group name for storage
|
|
|
|
|
get_vg_for_storage() {
|
|
|
|
|
local host=$1
|
|
|
|
|
local storage=$2
|
|
|
|
|
|
|
|
|
|
sshpass -p "$PVE_PASS" ssh -o StrictHostKeyChecking=accept-new root@${host} \
|
|
|
|
|
"cat /etc/pve/storage.cfg 2>/dev/null | grep -A 5 'lvmthin: $storage' | grep 'vgname' | awk '{print \$2}'" 2>/dev/null || echo ""
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
# Get thin pool name for storage
|
|
|
|
|
get_thinpool_for_storage() {
|
|
|
|
|
local host=$1
|
|
|
|
|
local storage=$2
|
|
|
|
|
|
|
|
|
|
local pool=$(sshpass -p "$PVE_PASS" ssh -o StrictHostKeyChecking=accept-new root@${host} \
|
|
|
|
|
"cat /etc/pve/storage.cfg 2>/dev/null | grep -A 5 'lvmthin: $storage' | grep 'thinpool' | awk '{print \$2}'" 2>/dev/null || echo "")
|
|
|
|
|
|
|
|
|
|
# If pool is in vg/pool format, extract just the pool name
|
|
|
|
|
if [[ "$pool" == *"/"* ]]; then
|
|
|
|
|
pool=$(echo "$pool" | cut -d'/' -f2)
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
echo "$pool"
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
# Check if VG exists
|
|
|
|
|
check_vg_exists() {
|
|
|
|
|
local host=$1
|
|
|
|
|
local vg_name=$2
|
|
|
|
|
|
|
|
|
|
sshpass -p "$PVE_PASS" ssh -o StrictHostKeyChecking=accept-new root@${host} \
|
|
|
|
|
"vgs $vg_name 2>/dev/null | grep -q '$vg_name'" 2>/dev/null
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
# Check if thin pool exists in VG
|
|
|
|
|
check_thinpool_exists() {
|
|
|
|
|
local host=$1
|
|
|
|
|
local vg_name=$2
|
|
|
|
|
local pool_name=$3
|
|
|
|
|
|
|
|
|
|
sshpass -p "$PVE_PASS" ssh -o StrictHostKeyChecking=accept-new root@${host} \
|
|
|
|
|
"lvs $vg_name/$pool_name 2>/dev/null | grep -q '$pool_name'" 2>/dev/null
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
# Create thin pool if it doesn't exist
|
|
|
|
|
create_thin_pool() {
|
|
|
|
|
local host=$1
|
|
|
|
|
local vg_name=$2
|
|
|
|
|
local pool_name=$3
|
|
|
|
|
|
|
|
|
|
log_info "Creating thin pool $vg_name/$pool_name on $host..."
|
|
|
|
|
|
|
|
|
|
# Check if VG exists
|
|
|
|
|
if ! check_vg_exists "$host" "$vg_name"; then
|
|
|
|
|
log_error "Volume group $vg_name does not exist on $host"
|
|
|
|
|
return 1
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
# Check if pool already exists
|
|
|
|
|
if check_thinpool_exists "$host" "$vg_name" "$pool_name"; then
|
|
|
|
|
log_info "Thin pool $vg_name/$pool_name already exists"
|
|
|
|
|
return 0
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
# Get available space
|
|
|
|
|
local vg_free=$(sshpass -p "$PVE_PASS" ssh -o StrictHostKeyChecking=accept-new root@${host} \
|
|
|
|
|
"vgs -o vg_free --noheadings --units g $vg_name 2>/dev/null | awk '{print int(\$1)}'" || echo "0")
|
|
|
|
|
|
|
|
|
|
if [ "$vg_free" -lt 10 ]; then
|
|
|
|
|
log_error "Not enough free space in $vg_name (${vg_free}G available)"
|
|
|
|
|
return 1
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
# Use 80% of available space
|
|
|
|
|
local pool_size=$((vg_free * 80 / 100))
|
|
|
|
|
log_info "Creating thin pool with ${pool_size}G (80% of ${vg_free}G free)"
|
|
|
|
|
|
|
|
|
|
sshpass -p "$PVE_PASS" ssh -o StrictHostKeyChecking=accept-new root@${host} <<EOF
|
|
|
|
|
lvcreate -L ${pool_size}G -n ${pool_name} ${vg_name} 2>&1
|
|
|
|
|
lvconvert --type thin-pool ${vg_name}/${pool_name} 2>&1
|
|
|
|
|
EOF
|
|
|
|
|
|
|
|
|
|
if [ $? -eq 0 ]; then
|
|
|
|
|
log_success "Thin pool $vg_name/$pool_name created"
|
|
|
|
|
return 0
|
|
|
|
|
else
|
|
|
|
|
log_error "Failed to create thin pool"
|
|
|
|
|
return 1
|
|
|
|
|
fi
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
# Recreate storage in Proxmox
|
|
|
|
|
recreate_storage() {
|
|
|
|
|
local host=$1
|
|
|
|
|
local storage=$2
|
|
|
|
|
local vg_name=$3
|
|
|
|
|
local pool_name=$4
|
|
|
|
|
local node_name=$5
|
|
|
|
|
|
|
|
|
|
log_info "Recreating storage $storage on $host..."
|
|
|
|
|
|
|
|
|
|
# Create thin pool if needed
|
|
|
|
|
if ! check_thinpool_exists "$host" "$vg_name" "$pool_name"; then
|
|
|
|
|
if ! create_thin_pool "$host" "$vg_name" "$pool_name"; then
|
|
|
|
|
log_error "Cannot recreate storage without thin pool"
|
|
|
|
|
return 1
|
|
|
|
|
fi
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
# Add storage to Proxmox
|
|
|
|
|
# Note: thinpool parameter should be just the pool name, not vg/pool
|
|
|
|
|
sshpass -p "$PVE_PASS" ssh -o StrictHostKeyChecking=accept-new root@${host} <<EOF
|
|
|
|
|
pvesm add lvmthin $storage \
|
|
|
|
|
--thinpool ${pool_name} \
|
|
|
|
|
--vgname ${vg_name} \
|
|
|
|
|
--content images,rootdir \
|
|
|
|
|
--nodes ${node_name} 2>&1
|
|
|
|
|
EOF
|
|
|
|
|
|
|
|
|
|
if [ $? -eq 0 ]; then
|
|
|
|
|
log_success "Storage $storage recreated successfully"
|
|
|
|
|
return 0
|
|
|
|
|
else
|
|
|
|
|
log_warn "Storage add command returned non-zero, checking if it exists..."
|
|
|
|
|
# Check if it was created
|
|
|
|
|
if sshpass -p "$PVE_PASS" ssh -o StrictHostKeyChecking=accept-new root@${host} \
|
|
|
|
|
"pvesm status 2>/dev/null | grep -q '$storage'" 2>/dev/null; then
|
|
|
|
|
log_success "Storage $storage exists (may have already been configured)"
|
|
|
|
|
return 0
|
|
|
|
|
else
|
|
|
|
|
log_error "Failed to recreate storage"
|
|
|
|
|
return 1
|
|
|
|
|
fi
|
|
|
|
|
fi
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
# Process a single node
|
|
|
|
|
process_node() {
|
|
|
|
|
local host=$1
|
|
|
|
|
local node_name=$2
|
|
|
|
|
|
|
|
|
|
log_info "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
|
|
|
|
log_info "Processing node: $node_name ($host)"
|
|
|
|
|
log_info "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
|
|
|
|
echo ""
|
|
|
|
|
|
|
|
|
|
# Get current VG status
|
|
|
|
|
log_info "Current volume groups:"
|
|
|
|
|
sshpass -p "$PVE_PASS" ssh -o StrictHostKeyChecking=accept-new root@${host} "vgs 2>/dev/null" || true
|
|
|
|
|
echo ""
|
|
|
|
|
|
|
|
|
|
# Process each storage pool
|
|
|
|
|
for storage in "${STORAGE_POOLS[@]}"; do
|
|
|
|
|
log_info "Processing storage: $storage"
|
|
|
|
|
|
|
|
|
|
# Get VG and pool names from config (before removal)
|
|
|
|
|
local vg_name=$(get_vg_for_storage "$host" "$storage")
|
|
|
|
|
local pool_name=$(get_thinpool_for_storage "$host" "$storage")
|
|
|
|
|
|
|
|
|
|
if [ -z "$vg_name" ]; then
|
|
|
|
|
# Use storage name as VG name (common pattern)
|
|
|
|
|
vg_name="$storage"
|
|
|
|
|
pool_name="$storage"
|
|
|
|
|
log_info "Using default: VG=$vg_name, Pool=$pool_name"
|
|
|
|
|
else
|
|
|
|
|
log_info "Found config: VG=$vg_name, Pool=$pool_name"
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
# Check if storage is in use
|
|
|
|
|
if ! check_storage_usage "$host" "$storage"; then
|
|
|
|
|
log_warn "Storage $storage may be in use. Proceeding with caution..."
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
# Remove storage
|
|
|
|
|
remove_storage "$host" "$storage"
|
|
|
|
|
|
|
|
|
|
# Recreate storage
|
|
|
|
|
if recreate_storage "$host" "$storage" "$vg_name" "$pool_name" "$node_name"; then
|
|
|
|
|
log_success "Storage $storage repaired on $node_name"
|
|
|
|
|
else
|
|
|
|
|
log_error "Failed to repair storage $storage on $node_name"
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
echo ""
|
|
|
|
|
done
|
|
|
|
|
|
|
|
|
|
# Show final status
|
|
|
|
|
log_info "Final storage status:"
|
|
|
|
|
sshpass -p "$PVE_PASS" ssh -o StrictHostKeyChecking=accept-new root@${host} \
|
|
|
|
|
"pvesm status | grep -E '(thin1|thin2|thin3)'" || true
|
|
|
|
|
echo ""
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
# Main execution
|
|
|
|
|
main() {
|
|
|
|
|
echo ""
|
|
|
|
|
log_info "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
|
|
|
|
log_info "Repair and Redeploy Thin Storage Pools"
|
|
|
|
|
log_info "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
|
|
|
|
echo ""
|
|
|
|
|
|
|
|
|
|
log_warn "This script will:"
|
|
|
|
|
log_warn " 1. Remove thin1, thin2, thin3 storage configurations"
|
|
|
|
|
log_warn " 2. Recreate them properly"
|
|
|
|
|
log_warn " 3. Ensure they are properly configured"
|
|
|
|
|
echo ""
|
|
|
|
|
|
|
|
|
|
# Check for non-interactive mode
|
|
|
|
|
if [[ "${NON_INTERACTIVE:-}" == "1" ]] || [[ ! -t 0 ]]; then
|
|
|
|
|
log_info "Non-interactive mode: proceeding automatically"
|
|
|
|
|
else
|
|
|
|
|
read -p "Continue? (y/N): " -n 1 -r
|
|
|
|
|
echo ""
|
|
|
|
|
if [[ ! $REPLY =~ ^[Yy]$ ]]; then
|
|
|
|
|
log_info "Operation cancelled"
|
|
|
|
|
exit 0
|
|
|
|
|
fi
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
# Process pve
|
|
|
|
|
if sshpass -p "$PVE_PASS" ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=accept-new root@${PROXMOX_HOST_PVE} "echo 'connected'" 2>/dev/null; then
|
|
|
|
|
process_node "$PROXMOX_HOST_PVE" "pve"
|
|
|
|
|
else
|
|
|
|
|
log_error "Cannot connect to pve"
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
# Process pve2
|
|
|
|
|
if sshpass -p "$PVE_PASS" ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=accept-new root@${PROXMOX_HOST_PVE2} "echo 'connected'" 2>/dev/null; then
|
|
|
|
|
process_node "$PROXMOX_HOST_PVE2" "pve2"
|
|
|
|
|
else
|
|
|
|
|
log_warn "Cannot connect to pve2, skipping..."
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
echo ""
|
|
|
|
|
log_info "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
|
|
|
|
log_info "Repair Complete"
|
|
|
|
|
log_info "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
|
|
|
|
echo ""
|
|
|
|
|
log_info "Storage pools have been repaired and redeployed."
|
|
|
|
|
log_info "Verify status with: ssh root@<node> 'pvesm status'"
|
|
|
|
|
echo ""
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
main "$@"
|
|
|
|
|
|