- Organized 252 files across project - Root directory: 187 → 2 files (98.9% reduction) - Moved configuration guides to docs/04-configuration/ - Moved troubleshooting guides to docs/09-troubleshooting/ - Moved quick start guides to docs/01-getting-started/ - Moved reports to reports/ directory - Archived temporary files - Generated comprehensive reports and documentation - Created maintenance scripts and guides All files organized according to established standards.
488 lines
14 KiB
Bash
Executable File
488 lines
14 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# Diagnose and Fix Storage Issues for Proxmox Container Migrations
|
|
# This script checks storage configuration and fixes issues to enable migrations
|
|
|
|
set -euo pipefail
|
|
|
|
# Configuration
|
|
PROXMOX_HOST_ML110="192.168.11.10"
|
|
PROXMOX_HOST_PVE="192.168.11.11"
|
|
PROXMOX_HOST_PVE2="192.168.11.12"
|
|
ML110_PASS="L@kers2010"
|
|
PVE_PASS="password"
|
|
PVE2_PASS="password"
|
|
|
|
# Containers to migrate
|
|
CONTAINERS=(1504 2503 2504 6201)
|
|
TARGET_NODE="pve"
|
|
|
|
# Colors
|
|
RED='\033[0;31m'
|
|
GREEN='\033[0;32m'
|
|
YELLOW='\033[1;33m'
|
|
BLUE='\033[0;34m'
|
|
CYAN='\033[0;36m'
|
|
NC='\033[0m'
|
|
|
|
log_info() { echo -e "${BLUE}[INFO]${NC} $1"; }
|
|
log_success() { echo -e "${GREEN}[✓]${NC} $1"; }
|
|
log_warn() { echo -e "${YELLOW}[WARN]${NC} $1"; }
|
|
log_error() { echo -e "${RED}[ERROR]${NC} $1"; }
|
|
log_header() { echo -e "${CYAN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"; }
|
|
|
|
# SSH helper with password
|
|
ssh_node() {
|
|
local host=$1
|
|
local pass=$2
|
|
shift 2
|
|
sshpass -p "$pass" ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=accept-new root@"$host" "$@" 2>&1
|
|
}
|
|
|
|
# Check if node is reachable
|
|
check_node_connectivity() {
|
|
local host=$1
|
|
local pass=$2
|
|
local node_name=$3
|
|
|
|
log_info "Checking connectivity to $node_name ($host)..."
|
|
if sshpass -p "$pass" ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=accept-new root@"$host" "echo 'connected'" 2>/dev/null; then
|
|
log_success "$node_name is reachable"
|
|
return 0
|
|
else
|
|
log_error "$node_name is not reachable"
|
|
return 1
|
|
fi
|
|
}
|
|
|
|
# Get storage status on a node
|
|
get_storage_status() {
|
|
local host=$1
|
|
local pass=$2
|
|
local node_name=$3
|
|
|
|
log_info "Checking storage status on $node_name..."
|
|
echo ""
|
|
ssh_node "$host" "$pass" "pvesm status" || {
|
|
log_error "Failed to get storage status from $node_name"
|
|
return 1
|
|
}
|
|
echo ""
|
|
}
|
|
|
|
# Get volume groups on a node
|
|
get_volume_groups() {
|
|
local host=$1
|
|
local pass=$2
|
|
local node_name=$3
|
|
|
|
log_info "Checking volume groups on $node_name..."
|
|
echo ""
|
|
ssh_node "$host" "$pass" "vgs" || {
|
|
log_warn "Failed to get volume groups from $node_name (may not have LVM)"
|
|
return 1
|
|
}
|
|
echo ""
|
|
}
|
|
|
|
# Get storage configuration
|
|
get_storage_config() {
|
|
local host=$1
|
|
local pass=$2
|
|
local node_name=$3
|
|
|
|
log_info "Checking storage configuration on $node_name..."
|
|
echo ""
|
|
ssh_node "$host" "$pass" "cat /etc/pve/storage.cfg 2>/dev/null || echo 'No storage.cfg found'" || true
|
|
echo ""
|
|
}
|
|
|
|
# Check where a container is located
|
|
find_container_location() {
|
|
local vmid=$1
|
|
|
|
log_info "Finding location of container $vmid..."
|
|
|
|
# Check on ml110
|
|
if ssh_node "$PROXMOX_HOST_ML110" "$ML110_PASS" "pvesh get /nodes/ml110/lxc/$vmid/status/current 2>/dev/null | jq -r '.status' 2>/dev/null" | grep -q "running\|stopped"; then
|
|
echo "ml110"
|
|
return 0
|
|
fi
|
|
|
|
# Check on pve
|
|
if ssh_node "$PROXMOX_HOST_PVE" "$PVE_PASS" "pvesh get /nodes/pve/lxc/$vmid/status/current 2>/dev/null | jq -r '.status' 2>/dev/null" | grep -q "running\|stopped"; then
|
|
echo "pve"
|
|
return 0
|
|
fi
|
|
|
|
# Check on pve2
|
|
if ssh_node "$PROXMOX_HOST_PVE2" "$PVE2_PASS" "pvesh get /nodes/pve2/lxc/$vmid/status/current 2>/dev/null | jq -r '.status' 2>/dev/null" | grep -q "running\|stopped"; then
|
|
echo "pve2"
|
|
return 0
|
|
fi
|
|
|
|
echo "not_found"
|
|
return 1
|
|
}
|
|
|
|
# Get container storage info
|
|
get_container_storage() {
|
|
local vmid=$1
|
|
local node=$2
|
|
local host=$3
|
|
local pass=$4
|
|
|
|
log_info "Getting storage info for container $vmid on $node..."
|
|
|
|
# Get rootfs storage
|
|
local rootfs=$(ssh_node "$host" "$pass" "pct config $vmid 2>/dev/null | grep '^rootfs:' | awk '{print \$2}' | cut -d: -f1" || echo "unknown")
|
|
|
|
echo "$rootfs"
|
|
}
|
|
|
|
# Check if storage exists and is active on target node
|
|
check_target_storage() {
|
|
local target_node=$1
|
|
local storage_name=$2
|
|
|
|
local host=""
|
|
local pass=""
|
|
|
|
case "$target_node" in
|
|
pve)
|
|
host="$PROXMOX_HOST_PVE"
|
|
pass="$PVE_PASS"
|
|
;;
|
|
pve2)
|
|
host="$PROXMOX_HOST_PVE2"
|
|
pass="$PVE2_PASS"
|
|
;;
|
|
*)
|
|
log_error "Unknown target node: $target_node"
|
|
return 1
|
|
;;
|
|
esac
|
|
|
|
log_info "Checking if storage '$storage_name' exists and is active on $target_node..."
|
|
|
|
local status=$(ssh_node "$host" "$pass" "pvesm status 2>/dev/null | grep '^$storage_name' | awk '{print \$3}'" || echo "")
|
|
|
|
if [ -z "$status" ]; then
|
|
log_error "Storage '$storage_name' not found on $target_node"
|
|
return 1
|
|
fi
|
|
|
|
if echo "$status" | grep -qi "active\|enabled"; then
|
|
log_success "Storage '$storage_name' is active on $target_node"
|
|
return 0
|
|
else
|
|
log_warn "Storage '$storage_name' exists but is not active (status: $status)"
|
|
return 1
|
|
fi
|
|
}
|
|
|
|
# Fix storage configuration on target node
|
|
fix_target_storage() {
|
|
local target_node=$1
|
|
local preferred_storage=$2
|
|
|
|
local host=""
|
|
local pass=""
|
|
|
|
case "$target_node" in
|
|
pve)
|
|
host="$PROXMOX_HOST_PVE"
|
|
pass="$PVE_PASS"
|
|
;;
|
|
pve2)
|
|
host="$PROXMOX_HOST_PVE2"
|
|
pass="$PVE2_PASS"
|
|
;;
|
|
*)
|
|
log_error "Unknown target node: $target_node"
|
|
return 1
|
|
;;
|
|
esac
|
|
|
|
log_info "Attempting to fix storage configuration on $target_node..."
|
|
|
|
# Check available storage
|
|
local available_storage=$(ssh_node "$host" "$pass" "pvesm status 2>/dev/null | grep -E '(thin1|local|local-lvm)' | head -1 | awk '{print \$1}'" || echo "")
|
|
|
|
if [ -z "$available_storage" ]; then
|
|
log_error "No suitable storage found on $target_node"
|
|
log_info "Available storage:"
|
|
ssh_node "$host" "$pass" "pvesm status" || true
|
|
return 1
|
|
fi
|
|
|
|
log_success "Found available storage: $available_storage on $target_node"
|
|
echo "$available_storage"
|
|
}
|
|
|
|
# Diagnose all nodes
|
|
diagnose_all_nodes() {
|
|
log_header
|
|
log_info "DIAGNOSTIC PHASE - Checking All Nodes"
|
|
log_header
|
|
echo ""
|
|
|
|
# Check ml110
|
|
if check_node_connectivity "$PROXMOX_HOST_ML110" "$ML110_PASS" "ml110"; then
|
|
get_storage_status "$PROXMOX_HOST_ML110" "$ML110_PASS" "ml110"
|
|
get_volume_groups "$PROXMOX_HOST_ML110" "$ML110_PASS" "ml110"
|
|
fi
|
|
|
|
# Check pve
|
|
if check_node_connectivity "$PROXMOX_HOST_PVE" "$PVE_PASS" "pve"; then
|
|
get_storage_status "$PROXMOX_HOST_PVE" "$PVE_PASS" "pve"
|
|
get_volume_groups "$PROXMOX_HOST_PVE" "$PVE_PASS" "pve"
|
|
fi
|
|
|
|
# Check pve2
|
|
if check_node_connectivity "$PROXMOX_HOST_PVE2" "$PVE2_PASS" "pve2"; then
|
|
get_storage_status "$PROXMOX_HOST_PVE2" "$PVE2_PASS" "pve2"
|
|
get_volume_groups "$PROXMOX_HOST_PVE2" "$PVE2_PASS" "pve2"
|
|
fi
|
|
}
|
|
|
|
# Diagnose container locations and storage
|
|
diagnose_containers() {
|
|
log_header
|
|
log_info "CONTAINER DIAGNOSTIC PHASE"
|
|
log_header
|
|
echo ""
|
|
|
|
for vmid in "${CONTAINERS[@]}"; do
|
|
log_info "Container $vmid:"
|
|
|
|
# Find location
|
|
local location=$(find_container_location "$vmid")
|
|
log_info " Location: $location"
|
|
|
|
if [ "$location" != "not_found" ]; then
|
|
# Get storage
|
|
local host=""
|
|
local pass=""
|
|
|
|
case "$location" in
|
|
ml110)
|
|
host="$PROXMOX_HOST_ML110"
|
|
pass="$ML110_PASS"
|
|
;;
|
|
pve)
|
|
host="$PROXMOX_HOST_PVE"
|
|
pass="$PVE_PASS"
|
|
;;
|
|
pve2)
|
|
host="$PROXMOX_HOST_PVE2"
|
|
pass="$PVE2_PASS"
|
|
;;
|
|
esac
|
|
|
|
local storage=$(get_container_storage "$vmid" "$location" "$host" "$pass")
|
|
log_info " Current storage: $storage"
|
|
|
|
# Get status
|
|
local status=$(ssh_node "$host" "$pass" "pvesh get /nodes/$location/lxc/$vmid/status/current 2>/dev/null | jq -r '.status' 2>/dev/null" || echo "unknown")
|
|
log_info " Status: $status"
|
|
else
|
|
log_warn " Container $vmid not found on any node"
|
|
fi
|
|
|
|
echo ""
|
|
done
|
|
}
|
|
|
|
# Fix storage and attempt migration
|
|
fix_and_migrate() {
|
|
log_header
|
|
log_info "FIX AND MIGRATION PHASE"
|
|
log_header
|
|
echo ""
|
|
|
|
# Determine target storage
|
|
log_info "Determining target storage on $TARGET_NODE..."
|
|
local target_storage=$(fix_target_storage "$TARGET_NODE" "thin1")
|
|
|
|
if [ -z "$target_storage" ]; then
|
|
log_error "Cannot determine target storage. Aborting migration."
|
|
return 1
|
|
fi
|
|
|
|
log_success "Using target storage: $target_storage on $TARGET_NODE"
|
|
echo ""
|
|
|
|
# Migrate each container
|
|
local failed=0
|
|
for vmid in "${CONTAINERS[@]}"; do
|
|
log_info "Processing container $vmid..."
|
|
|
|
# Find current location
|
|
local location=$(find_container_location "$vmid")
|
|
|
|
if [ "$location" == "not_found" ]; then
|
|
log_warn "Container $vmid not found, skipping..."
|
|
continue
|
|
fi
|
|
|
|
if [ "$location" == "$TARGET_NODE" ]; then
|
|
log_success "Container $vmid is already on $TARGET_NODE"
|
|
continue
|
|
fi
|
|
|
|
# Get source host and pass
|
|
local source_host=""
|
|
local source_pass=""
|
|
|
|
case "$location" in
|
|
ml110)
|
|
source_host="$PROXMOX_HOST_ML110"
|
|
source_pass="$ML110_PASS"
|
|
;;
|
|
pve)
|
|
source_host="$PROXMOX_HOST_PVE"
|
|
source_pass="$PVE_PASS"
|
|
;;
|
|
pve2)
|
|
source_host="$PROXMOX_HOST_PVE2"
|
|
source_pass="$PVE2_PASS"
|
|
;;
|
|
esac
|
|
|
|
# Stop container if running
|
|
log_info " Checking container status..."
|
|
local status=$(ssh_node "$source_host" "$source_pass" "pvesh get /nodes/$location/lxc/$vmid/status/current 2>/dev/null | jq -r '.status' 2>/dev/null" || echo "stopped")
|
|
|
|
if [ "$status" == "running" ]; then
|
|
log_info " Stopping container $vmid..."
|
|
ssh_node "$source_host" "$source_pass" "pct stop $vmid" || {
|
|
log_warn " Failed to stop container, trying shutdown..."
|
|
ssh_node "$source_host" "$source_pass" "pvesh create /nodes/$location/lxc/$vmid/status/shutdown --timeout 30" || true
|
|
}
|
|
sleep 5
|
|
fi
|
|
|
|
# Attempt migration
|
|
log_info " Migrating container $vmid from $location to $TARGET_NODE..."
|
|
log_info " Target storage: $target_storage"
|
|
|
|
# Try migration with storage specification first
|
|
local migrate_output=$(ssh_node "$source_host" "$source_pass" \
|
|
"pvesh create /nodes/$location/lxc/$vmid/migrate --target $TARGET_NODE --storage $target_storage --online 0" 2>&1)
|
|
local migrate_exit=$?
|
|
|
|
if [ $migrate_exit -ne 0 ]; then
|
|
log_warn " Migration with storage specification failed, trying without storage..."
|
|
# Try without storage (Proxmox will use default)
|
|
migrate_output=$(ssh_node "$source_host" "$source_pass" \
|
|
"pvesh create /nodes/$location/lxc/$vmid/migrate --target $TARGET_NODE --online 0" 2>&1)
|
|
migrate_exit=$?
|
|
fi
|
|
|
|
if [ $migrate_exit -eq 0 ]; then
|
|
log_success " Migration command completed for container $vmid"
|
|
|
|
# Wait and verify
|
|
log_info " Waiting for migration to complete..."
|
|
local migrated=false
|
|
for i in {1..12}; do
|
|
sleep 5
|
|
local new_location=$(find_container_location "$vmid")
|
|
if [ "$new_location" == "$TARGET_NODE" ]; then
|
|
log_success " Container $vmid is now on $TARGET_NODE"
|
|
migrated=true
|
|
break
|
|
fi
|
|
if [ $i -lt 12 ]; then
|
|
log_info " Still migrating... (attempt $i/12)"
|
|
fi
|
|
done
|
|
|
|
if [ "$migrated" == "false" ]; then
|
|
log_warn " Migration may still be in progress or failed"
|
|
log_info " Please verify manually: ssh root@$source_host 'pvesh get /nodes/$TARGET_NODE/lxc'"
|
|
failed=$((failed + 1))
|
|
fi
|
|
else
|
|
log_error " Migration failed for container $vmid"
|
|
log_info " Error output: $migrate_output"
|
|
failed=$((failed + 1))
|
|
fi
|
|
|
|
echo ""
|
|
done
|
|
|
|
if [ $failed -eq 0 ]; then
|
|
log_success "All containers migrated successfully!"
|
|
return 0
|
|
else
|
|
log_warn "$failed container(s) failed to migrate"
|
|
return 1
|
|
fi
|
|
}
|
|
|
|
# Main execution
|
|
main() {
|
|
echo ""
|
|
log_header
|
|
log_info "Proxmox Storage Migration Diagnostic and Fix Tool"
|
|
log_header
|
|
echo ""
|
|
|
|
log_info "This script will:"
|
|
log_info " 1. Diagnose storage configuration on all nodes"
|
|
log_info " 2. Check container locations and storage"
|
|
log_info " 3. Fix storage issues if needed"
|
|
log_info " 4. Attempt to migrate containers: ${CONTAINERS[*]}"
|
|
log_info " 5. Target node: $TARGET_NODE"
|
|
echo ""
|
|
|
|
# Check for non-interactive mode
|
|
if [[ "${NON_INTERACTIVE:-}" == "1" ]] || [[ ! -t 0 ]]; then
|
|
log_info "Non-interactive mode: proceeding automatically"
|
|
else
|
|
read -p "Continue? (y/N): " -n 1 -r
|
|
echo ""
|
|
if [[ ! $REPLY =~ ^[Yy]$ ]]; then
|
|
log_info "Operation cancelled"
|
|
exit 0
|
|
fi
|
|
fi
|
|
|
|
echo ""
|
|
|
|
# Phase 1: Diagnose
|
|
diagnose_all_nodes
|
|
echo ""
|
|
diagnose_containers
|
|
echo ""
|
|
|
|
# Phase 2: Fix and migrate
|
|
if fix_and_migrate; then
|
|
log_success "Migration process completed successfully!"
|
|
else
|
|
log_warn "Migration process completed with some failures"
|
|
log_info "Review the output above for details"
|
|
fi
|
|
|
|
echo ""
|
|
log_header
|
|
log_info "Final Container Locations"
|
|
log_header
|
|
echo ""
|
|
|
|
for vmid in "${CONTAINERS[@]}"; do
|
|
local location=$(find_container_location "$vmid")
|
|
if [ "$location" != "not_found" ]; then
|
|
log_success "Container $vmid: $location"
|
|
else
|
|
log_warn "Container $vmid: not found"
|
|
fi
|
|
done
|
|
|
|
echo ""
|
|
}
|
|
|
|
main "$@"
|
|
|