Files
proxmox/scripts/diagnose-and-fix-migration-storage.sh
defiQUG cb47cce074 Complete markdown files cleanup and organization
- Organized 252 files across project
- Root directory: 187 → 2 files (98.9% reduction)
- Moved configuration guides to docs/04-configuration/
- Moved troubleshooting guides to docs/09-troubleshooting/
- Moved quick start guides to docs/01-getting-started/
- Moved reports to reports/ directory
- Archived temporary files
- Generated comprehensive reports and documentation
- Created maintenance scripts and guides

All files organized according to established standards.
2026-01-06 01:46:25 -08:00

488 lines
14 KiB
Bash
Executable File

#!/usr/bin/env bash
# Diagnose and Fix Storage Issues for Proxmox Container Migrations
# This script checks storage configuration and fixes issues to enable migrations
set -euo pipefail
# Configuration
PROXMOX_HOST_ML110="192.168.11.10"
PROXMOX_HOST_PVE="192.168.11.11"
PROXMOX_HOST_PVE2="192.168.11.12"
ML110_PASS="L@kers2010"
PVE_PASS="password"
PVE2_PASS="password"
# Containers to migrate
CONTAINERS=(1504 2503 2504 6201)
TARGET_NODE="pve"
# Colors
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
CYAN='\033[0;36m'
NC='\033[0m'
log_info() { echo -e "${BLUE}[INFO]${NC} $1"; }
log_success() { echo -e "${GREEN}[✓]${NC} $1"; }
log_warn() { echo -e "${YELLOW}[WARN]${NC} $1"; }
log_error() { echo -e "${RED}[ERROR]${NC} $1"; }
log_header() { echo -e "${CYAN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"; }
# SSH helper with password
ssh_node() {
local host=$1
local pass=$2
shift 2
sshpass -p "$pass" ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=accept-new root@"$host" "$@" 2>&1
}
# Check if node is reachable
check_node_connectivity() {
local host=$1
local pass=$2
local node_name=$3
log_info "Checking connectivity to $node_name ($host)..."
if sshpass -p "$pass" ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=accept-new root@"$host" "echo 'connected'" 2>/dev/null; then
log_success "$node_name is reachable"
return 0
else
log_error "$node_name is not reachable"
return 1
fi
}
# Get storage status on a node
get_storage_status() {
local host=$1
local pass=$2
local node_name=$3
log_info "Checking storage status on $node_name..."
echo ""
ssh_node "$host" "$pass" "pvesm status" || {
log_error "Failed to get storage status from $node_name"
return 1
}
echo ""
}
# Get volume groups on a node
get_volume_groups() {
local host=$1
local pass=$2
local node_name=$3
log_info "Checking volume groups on $node_name..."
echo ""
ssh_node "$host" "$pass" "vgs" || {
log_warn "Failed to get volume groups from $node_name (may not have LVM)"
return 1
}
echo ""
}
# Get storage configuration
get_storage_config() {
local host=$1
local pass=$2
local node_name=$3
log_info "Checking storage configuration on $node_name..."
echo ""
ssh_node "$host" "$pass" "cat /etc/pve/storage.cfg 2>/dev/null || echo 'No storage.cfg found'" || true
echo ""
}
# Check where a container is located
find_container_location() {
local vmid=$1
log_info "Finding location of container $vmid..."
# Check on ml110
if ssh_node "$PROXMOX_HOST_ML110" "$ML110_PASS" "pvesh get /nodes/ml110/lxc/$vmid/status/current 2>/dev/null | jq -r '.status' 2>/dev/null" | grep -q "running\|stopped"; then
echo "ml110"
return 0
fi
# Check on pve
if ssh_node "$PROXMOX_HOST_PVE" "$PVE_PASS" "pvesh get /nodes/pve/lxc/$vmid/status/current 2>/dev/null | jq -r '.status' 2>/dev/null" | grep -q "running\|stopped"; then
echo "pve"
return 0
fi
# Check on pve2
if ssh_node "$PROXMOX_HOST_PVE2" "$PVE2_PASS" "pvesh get /nodes/pve2/lxc/$vmid/status/current 2>/dev/null | jq -r '.status' 2>/dev/null" | grep -q "running\|stopped"; then
echo "pve2"
return 0
fi
echo "not_found"
return 1
}
# Get container storage info
get_container_storage() {
local vmid=$1
local node=$2
local host=$3
local pass=$4
log_info "Getting storage info for container $vmid on $node..."
# Get rootfs storage
local rootfs=$(ssh_node "$host" "$pass" "pct config $vmid 2>/dev/null | grep '^rootfs:' | awk '{print \$2}' | cut -d: -f1" || echo "unknown")
echo "$rootfs"
}
# Check if storage exists and is active on target node
check_target_storage() {
local target_node=$1
local storage_name=$2
local host=""
local pass=""
case "$target_node" in
pve)
host="$PROXMOX_HOST_PVE"
pass="$PVE_PASS"
;;
pve2)
host="$PROXMOX_HOST_PVE2"
pass="$PVE2_PASS"
;;
*)
log_error "Unknown target node: $target_node"
return 1
;;
esac
log_info "Checking if storage '$storage_name' exists and is active on $target_node..."
local status=$(ssh_node "$host" "$pass" "pvesm status 2>/dev/null | grep '^$storage_name' | awk '{print \$3}'" || echo "")
if [ -z "$status" ]; then
log_error "Storage '$storage_name' not found on $target_node"
return 1
fi
if echo "$status" | grep -qi "active\|enabled"; then
log_success "Storage '$storage_name' is active on $target_node"
return 0
else
log_warn "Storage '$storage_name' exists but is not active (status: $status)"
return 1
fi
}
# Fix storage configuration on target node
fix_target_storage() {
local target_node=$1
local preferred_storage=$2
local host=""
local pass=""
case "$target_node" in
pve)
host="$PROXMOX_HOST_PVE"
pass="$PVE_PASS"
;;
pve2)
host="$PROXMOX_HOST_PVE2"
pass="$PVE2_PASS"
;;
*)
log_error "Unknown target node: $target_node"
return 1
;;
esac
log_info "Attempting to fix storage configuration on $target_node..."
# Check available storage
local available_storage=$(ssh_node "$host" "$pass" "pvesm status 2>/dev/null | grep -E '(thin1|local|local-lvm)' | head -1 | awk '{print \$1}'" || echo "")
if [ -z "$available_storage" ]; then
log_error "No suitable storage found on $target_node"
log_info "Available storage:"
ssh_node "$host" "$pass" "pvesm status" || true
return 1
fi
log_success "Found available storage: $available_storage on $target_node"
echo "$available_storage"
}
# Diagnose all nodes
diagnose_all_nodes() {
log_header
log_info "DIAGNOSTIC PHASE - Checking All Nodes"
log_header
echo ""
# Check ml110
if check_node_connectivity "$PROXMOX_HOST_ML110" "$ML110_PASS" "ml110"; then
get_storage_status "$PROXMOX_HOST_ML110" "$ML110_PASS" "ml110"
get_volume_groups "$PROXMOX_HOST_ML110" "$ML110_PASS" "ml110"
fi
# Check pve
if check_node_connectivity "$PROXMOX_HOST_PVE" "$PVE_PASS" "pve"; then
get_storage_status "$PROXMOX_HOST_PVE" "$PVE_PASS" "pve"
get_volume_groups "$PROXMOX_HOST_PVE" "$PVE_PASS" "pve"
fi
# Check pve2
if check_node_connectivity "$PROXMOX_HOST_PVE2" "$PVE2_PASS" "pve2"; then
get_storage_status "$PROXMOX_HOST_PVE2" "$PVE2_PASS" "pve2"
get_volume_groups "$PROXMOX_HOST_PVE2" "$PVE2_PASS" "pve2"
fi
}
# Diagnose container locations and storage
diagnose_containers() {
log_header
log_info "CONTAINER DIAGNOSTIC PHASE"
log_header
echo ""
for vmid in "${CONTAINERS[@]}"; do
log_info "Container $vmid:"
# Find location
local location=$(find_container_location "$vmid")
log_info " Location: $location"
if [ "$location" != "not_found" ]; then
# Get storage
local host=""
local pass=""
case "$location" in
ml110)
host="$PROXMOX_HOST_ML110"
pass="$ML110_PASS"
;;
pve)
host="$PROXMOX_HOST_PVE"
pass="$PVE_PASS"
;;
pve2)
host="$PROXMOX_HOST_PVE2"
pass="$PVE2_PASS"
;;
esac
local storage=$(get_container_storage "$vmid" "$location" "$host" "$pass")
log_info " Current storage: $storage"
# Get status
local status=$(ssh_node "$host" "$pass" "pvesh get /nodes/$location/lxc/$vmid/status/current 2>/dev/null | jq -r '.status' 2>/dev/null" || echo "unknown")
log_info " Status: $status"
else
log_warn " Container $vmid not found on any node"
fi
echo ""
done
}
# Fix storage and attempt migration
fix_and_migrate() {
log_header
log_info "FIX AND MIGRATION PHASE"
log_header
echo ""
# Determine target storage
log_info "Determining target storage on $TARGET_NODE..."
local target_storage=$(fix_target_storage "$TARGET_NODE" "thin1")
if [ -z "$target_storage" ]; then
log_error "Cannot determine target storage. Aborting migration."
return 1
fi
log_success "Using target storage: $target_storage on $TARGET_NODE"
echo ""
# Migrate each container
local failed=0
for vmid in "${CONTAINERS[@]}"; do
log_info "Processing container $vmid..."
# Find current location
local location=$(find_container_location "$vmid")
if [ "$location" == "not_found" ]; then
log_warn "Container $vmid not found, skipping..."
continue
fi
if [ "$location" == "$TARGET_NODE" ]; then
log_success "Container $vmid is already on $TARGET_NODE"
continue
fi
# Get source host and pass
local source_host=""
local source_pass=""
case "$location" in
ml110)
source_host="$PROXMOX_HOST_ML110"
source_pass="$ML110_PASS"
;;
pve)
source_host="$PROXMOX_HOST_PVE"
source_pass="$PVE_PASS"
;;
pve2)
source_host="$PROXMOX_HOST_PVE2"
source_pass="$PVE2_PASS"
;;
esac
# Stop container if running
log_info " Checking container status..."
local status=$(ssh_node "$source_host" "$source_pass" "pvesh get /nodes/$location/lxc/$vmid/status/current 2>/dev/null | jq -r '.status' 2>/dev/null" || echo "stopped")
if [ "$status" == "running" ]; then
log_info " Stopping container $vmid..."
ssh_node "$source_host" "$source_pass" "pct stop $vmid" || {
log_warn " Failed to stop container, trying shutdown..."
ssh_node "$source_host" "$source_pass" "pvesh create /nodes/$location/lxc/$vmid/status/shutdown --timeout 30" || true
}
sleep 5
fi
# Attempt migration
log_info " Migrating container $vmid from $location to $TARGET_NODE..."
log_info " Target storage: $target_storage"
# Try migration with storage specification first
local migrate_output=$(ssh_node "$source_host" "$source_pass" \
"pvesh create /nodes/$location/lxc/$vmid/migrate --target $TARGET_NODE --storage $target_storage --online 0" 2>&1)
local migrate_exit=$?
if [ $migrate_exit -ne 0 ]; then
log_warn " Migration with storage specification failed, trying without storage..."
# Try without storage (Proxmox will use default)
migrate_output=$(ssh_node "$source_host" "$source_pass" \
"pvesh create /nodes/$location/lxc/$vmid/migrate --target $TARGET_NODE --online 0" 2>&1)
migrate_exit=$?
fi
if [ $migrate_exit -eq 0 ]; then
log_success " Migration command completed for container $vmid"
# Wait and verify
log_info " Waiting for migration to complete..."
local migrated=false
for i in {1..12}; do
sleep 5
local new_location=$(find_container_location "$vmid")
if [ "$new_location" == "$TARGET_NODE" ]; then
log_success " Container $vmid is now on $TARGET_NODE"
migrated=true
break
fi
if [ $i -lt 12 ]; then
log_info " Still migrating... (attempt $i/12)"
fi
done
if [ "$migrated" == "false" ]; then
log_warn " Migration may still be in progress or failed"
log_info " Please verify manually: ssh root@$source_host 'pvesh get /nodes/$TARGET_NODE/lxc'"
failed=$((failed + 1))
fi
else
log_error " Migration failed for container $vmid"
log_info " Error output: $migrate_output"
failed=$((failed + 1))
fi
echo ""
done
if [ $failed -eq 0 ]; then
log_success "All containers migrated successfully!"
return 0
else
log_warn "$failed container(s) failed to migrate"
return 1
fi
}
# Main execution
main() {
echo ""
log_header
log_info "Proxmox Storage Migration Diagnostic and Fix Tool"
log_header
echo ""
log_info "This script will:"
log_info " 1. Diagnose storage configuration on all nodes"
log_info " 2. Check container locations and storage"
log_info " 3. Fix storage issues if needed"
log_info " 4. Attempt to migrate containers: ${CONTAINERS[*]}"
log_info " 5. Target node: $TARGET_NODE"
echo ""
# Check for non-interactive mode
if [[ "${NON_INTERACTIVE:-}" == "1" ]] || [[ ! -t 0 ]]; then
log_info "Non-interactive mode: proceeding automatically"
else
read -p "Continue? (y/N): " -n 1 -r
echo ""
if [[ ! $REPLY =~ ^[Yy]$ ]]; then
log_info "Operation cancelled"
exit 0
fi
fi
echo ""
# Phase 1: Diagnose
diagnose_all_nodes
echo ""
diagnose_containers
echo ""
# Phase 2: Fix and migrate
if fix_and_migrate; then
log_success "Migration process completed successfully!"
else
log_warn "Migration process completed with some failures"
log_info "Review the output above for details"
fi
echo ""
log_header
log_info "Final Container Locations"
log_header
echo ""
for vmid in "${CONTAINERS[@]}"; do
local location=$(find_container_location "$vmid")
if [ "$location" != "not_found" ]; then
log_success "Container $vmid: $location"
else
log_warn "Container $vmid: not found"
fi
done
echo ""
}
main "$@"