Files
proxmox/scripts/repair-thin-storage.sh
defiQUG cb47cce074 Complete markdown files cleanup and organization
- Organized 252 files across project
- Root directory: 187 → 2 files (98.9% reduction)
- Moved configuration guides to docs/04-configuration/
- Moved troubleshooting guides to docs/09-troubleshooting/
- Moved quick start guides to docs/01-getting-started/
- Moved reports to reports/ directory
- Archived temporary files
- Generated comprehensive reports and documentation
- Created maintenance scripts and guides

All files organized according to established standards.
2026-01-06 01:46:25 -08:00

314 lines
11 KiB
Bash
Executable File

#!/usr/bin/env bash
# Repair and redeploy thin1-thin3 storage pools on pve and pve2
# This script removes broken storage configurations and recreates them properly
set -euo pipefail
PROXMOX_HOST_PVE="192.168.11.11"
PROXMOX_HOST_PVE2="192.168.11.12"
PVE_PASS="password"
STORAGE_POOLS=("thin1" "thin2" "thin3")
# Colors
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'
log_info() { echo -e "${BLUE}[INFO]${NC} $1"; }
log_success() { echo -e "${GREEN}[✓]${NC} $1"; }
log_warn() { echo -e "${YELLOW}[WARN]${NC} $1"; }
log_error() { echo -e "${RED}[ERROR]${NC} $1"; }
# Check what's using a storage pool
check_storage_usage() {
local host=$1
local storage=$2
log_info "Checking if $storage is in use on $host..."
local usage=$(sshpass -p "$PVE_PASS" ssh -o StrictHostKeyChecking=accept-new root@${host} \
"pvesm list $storage 2>/dev/null | wc -l" || echo "0")
if [ "$usage" -gt 1 ]; then
log_warn "Storage $storage has $((usage - 1)) items (header excluded)"
return 1
else
log_info "Storage $storage appears to be unused"
return 0
fi
}
# Remove storage from Proxmox
remove_storage() {
local host=$1
local storage=$2
log_info "Removing storage $storage from Proxmox on $host..."
# Check if storage exists
if ! sshpass -p "$PVE_PASS" ssh -o StrictHostKeyChecking=accept-new root@${host} \
"pvesm status 2>/dev/null | grep -q '$storage'" 2>/dev/null; then
log_info "Storage $storage does not exist, skipping removal"
return 0
fi
# Remove from Proxmox config
sshpass -p "$PVE_PASS" ssh -o StrictHostKeyChecking=accept-new root@${host} \
"pvesm remove $storage 2>&1" || {
log_warn "Failed to remove via pvesm, trying to edit config directly..."
# Remove from storage.cfg
sshpass -p "$PVE_PASS" ssh -o StrictHostKeyChecking=accept-new root@${host} \
"sed -i '/^lvmthin: $storage$/,/^$/d' /etc/pve/storage.cfg 2>/dev/null" || true
}
log_success "Storage $storage removed from Proxmox configuration"
}
# Get volume group name for storage
get_vg_for_storage() {
local host=$1
local storage=$2
sshpass -p "$PVE_PASS" ssh -o StrictHostKeyChecking=accept-new root@${host} \
"cat /etc/pve/storage.cfg 2>/dev/null | grep -A 5 'lvmthin: $storage' | grep 'vgname' | awk '{print \$2}'" 2>/dev/null || echo ""
}
# Get thin pool name for storage
get_thinpool_for_storage() {
local host=$1
local storage=$2
local pool=$(sshpass -p "$PVE_PASS" ssh -o StrictHostKeyChecking=accept-new root@${host} \
"cat /etc/pve/storage.cfg 2>/dev/null | grep -A 5 'lvmthin: $storage' | grep 'thinpool' | awk '{print \$2}'" 2>/dev/null || echo "")
# If pool is in vg/pool format, extract just the pool name
if [[ "$pool" == *"/"* ]]; then
pool=$(echo "$pool" | cut -d'/' -f2)
fi
echo "$pool"
}
# Check if VG exists
check_vg_exists() {
local host=$1
local vg_name=$2
sshpass -p "$PVE_PASS" ssh -o StrictHostKeyChecking=accept-new root@${host} \
"vgs $vg_name 2>/dev/null | grep -q '$vg_name'" 2>/dev/null
}
# Check if thin pool exists in VG
check_thinpool_exists() {
local host=$1
local vg_name=$2
local pool_name=$3
sshpass -p "$PVE_PASS" ssh -o StrictHostKeyChecking=accept-new root@${host} \
"lvs $vg_name/$pool_name 2>/dev/null | grep -q '$pool_name'" 2>/dev/null
}
# Create thin pool if it doesn't exist
create_thin_pool() {
local host=$1
local vg_name=$2
local pool_name=$3
log_info "Creating thin pool $vg_name/$pool_name on $host..."
# Check if VG exists
if ! check_vg_exists "$host" "$vg_name"; then
log_error "Volume group $vg_name does not exist on $host"
return 1
fi
# Check if pool already exists
if check_thinpool_exists "$host" "$vg_name" "$pool_name"; then
log_info "Thin pool $vg_name/$pool_name already exists"
return 0
fi
# Get available space
local vg_free=$(sshpass -p "$PVE_PASS" ssh -o StrictHostKeyChecking=accept-new root@${host} \
"vgs -o vg_free --noheadings --units g $vg_name 2>/dev/null | awk '{print int(\$1)}'" || echo "0")
if [ "$vg_free" -lt 10 ]; then
log_error "Not enough free space in $vg_name (${vg_free}G available)"
return 1
fi
# Use 80% of available space
local pool_size=$((vg_free * 80 / 100))
log_info "Creating thin pool with ${pool_size}G (80% of ${vg_free}G free)"
sshpass -p "$PVE_PASS" ssh -o StrictHostKeyChecking=accept-new root@${host} <<EOF
lvcreate -L ${pool_size}G -n ${pool_name} ${vg_name} 2>&1
lvconvert --type thin-pool ${vg_name}/${pool_name} 2>&1
EOF
if [ $? -eq 0 ]; then
log_success "Thin pool $vg_name/$pool_name created"
return 0
else
log_error "Failed to create thin pool"
return 1
fi
}
# Recreate storage in Proxmox
recreate_storage() {
local host=$1
local storage=$2
local vg_name=$3
local pool_name=$4
local node_name=$5
log_info "Recreating storage $storage on $host..."
# Create thin pool if needed
if ! check_thinpool_exists "$host" "$vg_name" "$pool_name"; then
if ! create_thin_pool "$host" "$vg_name" "$pool_name"; then
log_error "Cannot recreate storage without thin pool"
return 1
fi
fi
# Add storage to Proxmox
# Note: thinpool parameter should be just the pool name, not vg/pool
sshpass -p "$PVE_PASS" ssh -o StrictHostKeyChecking=accept-new root@${host} <<EOF
pvesm add lvmthin $storage \
--thinpool ${pool_name} \
--vgname ${vg_name} \
--content images,rootdir \
--nodes ${node_name} 2>&1
EOF
if [ $? -eq 0 ]; then
log_success "Storage $storage recreated successfully"
return 0
else
log_warn "Storage add command returned non-zero, checking if it exists..."
# Check if it was created
if sshpass -p "$PVE_PASS" ssh -o StrictHostKeyChecking=accept-new root@${host} \
"pvesm status 2>/dev/null | grep -q '$storage'" 2>/dev/null; then
log_success "Storage $storage exists (may have already been configured)"
return 0
else
log_error "Failed to recreate storage"
return 1
fi
fi
}
# Process a single node
process_node() {
local host=$1
local node_name=$2
log_info "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
log_info "Processing node: $node_name ($host)"
log_info "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
echo ""
# Get current VG status
log_info "Current volume groups:"
sshpass -p "$PVE_PASS" ssh -o StrictHostKeyChecking=accept-new root@${host} "vgs 2>/dev/null" || true
echo ""
# Process each storage pool
for storage in "${STORAGE_POOLS[@]}"; do
log_info "Processing storage: $storage"
# Get VG and pool names from config (before removal)
local vg_name=$(get_vg_for_storage "$host" "$storage")
local pool_name=$(get_thinpool_for_storage "$host" "$storage")
if [ -z "$vg_name" ]; then
# Use storage name as VG name (common pattern)
vg_name="$storage"
pool_name="$storage"
log_info "Using default: VG=$vg_name, Pool=$pool_name"
else
log_info "Found config: VG=$vg_name, Pool=$pool_name"
fi
# Check if storage is in use
if ! check_storage_usage "$host" "$storage"; then
log_warn "Storage $storage may be in use. Proceeding with caution..."
fi
# Remove storage
remove_storage "$host" "$storage"
# Recreate storage
if recreate_storage "$host" "$storage" "$vg_name" "$pool_name" "$node_name"; then
log_success "Storage $storage repaired on $node_name"
else
log_error "Failed to repair storage $storage on $node_name"
fi
echo ""
done
# Show final status
log_info "Final storage status:"
sshpass -p "$PVE_PASS" ssh -o StrictHostKeyChecking=accept-new root@${host} \
"pvesm status | grep -E '(thin1|thin2|thin3)'" || true
echo ""
}
# Main execution
main() {
echo ""
log_info "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
log_info "Repair and Redeploy Thin Storage Pools"
log_info "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
echo ""
log_warn "This script will:"
log_warn " 1. Remove thin1, thin2, thin3 storage configurations"
log_warn " 2. Recreate them properly"
log_warn " 3. Ensure they are properly configured"
echo ""
# Check for non-interactive mode
if [[ "${NON_INTERACTIVE:-}" == "1" ]] || [[ ! -t 0 ]]; then
log_info "Non-interactive mode: proceeding automatically"
else
read -p "Continue? (y/N): " -n 1 -r
echo ""
if [[ ! $REPLY =~ ^[Yy]$ ]]; then
log_info "Operation cancelled"
exit 0
fi
fi
# Process pve
if sshpass -p "$PVE_PASS" ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=accept-new root@${PROXMOX_HOST_PVE} "echo 'connected'" 2>/dev/null; then
process_node "$PROXMOX_HOST_PVE" "pve"
else
log_error "Cannot connect to pve"
fi
# Process pve2
if sshpass -p "$PVE_PASS" ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=accept-new root@${PROXMOX_HOST_PVE2} "echo 'connected'" 2>/dev/null; then
process_node "$PROXMOX_HOST_PVE2" "pve2"
else
log_warn "Cannot connect to pve2, skipping..."
fi
echo ""
log_info "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
log_info "Repair Complete"
log_info "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
echo ""
log_info "Storage pools have been repaired and redeployed."
log_info "Verify status with: ssh root@<node> 'pvesm status'"
echo ""
}
main "$@"