Files
proxmox/scripts/monitor-rpc-migration.sh

215 lines
9.2 KiB
Bash
Raw Permalink Normal View History

#!/usr/bin/env bash
# Monitor RPC migration and automatically unlock/fix containers as they complete
set -euo pipefail
# Load IP configuration
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
source "${PROJECT_ROOT}/config/ip-addresses.conf" 2>/dev/null || true
PROXMOX_HOST="${1:-192.168.11.10}"
CHECK_INTERVAL="${2:-60}" # Check every 60 seconds
MAX_ITERATIONS="${3:-120}" # Run for up to 120 iterations (2 hours)
# Colors
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'
log_info() { echo -e "${BLUE}[INFO]${NC} $1"; }
log_success() { echo -e "${GREEN}[✓]${NC} $1"; }
log_warn() { echo -e "${YELLOW}[⚠]${NC} $1"; }
log_error() { echo -e "${RED}[✗]${NC} $1"; }
# Target VMIDs to monitor
TARGET_VMIDS=(2101 2201 2301 2303 2304 2305 2306 2307 2308 2403)
# Migration mappings for network configs
declare -A NETWORK_CONFIGS=(
["2101"]="${RPC_CORE_1}"
["2201"]="${RPC_PUBLIC_1}"
["2301"]="${RPC_PRIVATE_1}"
["2303"]="${RPC_NODE_233:-${RPC_NODE_233:-${RPC_NODE_233:-${RPC_NODE_233:-${RPC_NODE_233:-${RPC_NODE_233:-${RPC_NODE_233:-192.168.11.233}}}}}}}"
["2304"]="${RPC_NODE_234:-${RPC_NODE_234:-${RPC_NODE_234:-${RPC_NODE_234:-${RPC_NODE_234:-${RPC_NODE_234:-${RPC_NODE_234:-192.168.11.234}}}}}}}"
["2305"]="${RPC_NODE_235:-${RPC_NODE_235:-${RPC_NODE_235:-${RPC_NODE_235:-${RPC_NODE_235:-${RPC_NODE_235:-${RPC_NODE_235:-192.168.11.235}}}}}}}"
["2306"]="${RPC_NODE_236:-${RPC_NODE_236:-${RPC_NODE_236:-${RPC_NODE_236:-${RPC_NODE_236:-${RPC_NODE_236:-${RPC_NODE_236:-192.168.11.236}}}}}}}"
["2307"]="${IP_RPC_237:-${IP_RPC_237:-${IP_RPC_237:-192.168.11.237}}}"
["2308"]="${IP_RPC_238:-${IP_RPC_238:-${IP_RPC_238:-192.168.11.238}}}"
["2403"]="${RPC_THIRDWEB_3:-${RPC_THIRDWEB_3:-${RPC_THIRDWEB_3:-192.168.11.243}}}"
)
# Fix a container: unlock, add rootfs if needed, update network
fix_container() {
local vmid=$1
local ip=${NETWORK_CONFIGS[$vmid]}
log_info "Fixing container $vmid..."
# Check if container exists
if ! ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no root@"$PROXMOX_HOST" \
"pct list | grep -q '^$vmid '" 2>/dev/null; then
log_warn " VMID $vmid does not exist yet"
return 1
fi
# Check if locked
local lock_status=$(ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no root@"$PROXMOX_HOST" \
"pct config $vmid 2>/dev/null | grep '^lock:' | awk '{print \$2}'" || echo "")
if [ "$lock_status" = "create" ]; then
# Check if clone process is still running
local clone_running=$(ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no root@"$PROXMOX_HOST" \
"ps aux | grep 'pct clone.*$vmid' | grep -v grep | wc -l" || echo "0")
if [ "$clone_running" -gt 0 ]; then
log_info " Clone still in progress, waiting..."
return 1
fi
# Clone finished but still locked - unlock it
log_info " Unlocking container $vmid..."
ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no root@"$PROXMOX_HOST" \
"pct unlock $vmid" 2>&1 || {
log_warn " Failed to unlock, may need manual intervention"
return 1
}
sleep 2
fi
# Check if rootfs exists
local has_rootfs=$(ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no root@"$PROXMOX_HOST" \
"pct config $vmid 2>/dev/null | grep -c '^rootfs:'" || echo "0")
if [ "$has_rootfs" -eq 0 ]; then
# Check if logical volume exists
local lv_exists=$(ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no root@"$PROXMOX_HOST" \
"lvs | grep -c 'vm-$vmid-disk-0'" || echo "0")
if [ "$lv_exists" -gt 0 ]; then
log_info " Adding rootfs to $vmid..."
# Get size from source or use default
local size="200G"
ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no root@"$PROXMOX_HOST" \
"pct set $vmid --rootfs local-lvm:vm-$vmid-disk-0,size=$size" 2>&1 || {
log_warn " Failed to add rootfs"
return 1
}
log_success " Rootfs added"
else
log_warn " Logical volume not found for $vmid"
return 1
fi
fi
# Update network config
if [ -n "$ip" ]; then
log_info " Updating network config for $vmid (IP: $ip)..."
ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no root@"$PROXMOX_HOST" \
"pct set $vmid --net0 'name=eth0,bridge=vmbr0,firewall=1,ip=$ip/24,gw=${NETWORK_GATEWAY:-192.168.11.1}'" 2>&1 && {
log_success " Network config updated"
} || {
log_warn " Network config update failed (may already be correct)"
}
fi
log_success " Container $vmid fixed successfully"
return 0
}
# Check migration progress
check_progress() {
local completed=$(ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no root@"$PROXMOX_HOST" \
"pct list | grep -E '^2101|^2201|^2301|^2303|^2304|^2305|^2306|^2307|^2308|^2403' | grep -v 'create' | wc -l" || echo "0")
local in_create=$(ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no root@"$PROXMOX_HOST" \
"pct list | grep -E '^2101|^2201|^2301|^2303|^2304|^2305|^2306|^2307|^2308|^2403' | grep 'create' | wc -l" || echo "0")
local active_clones=$(ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no root@"$PROXMOX_HOST" \
"ps aux | grep -E 'pct clone|qm clone' | grep -v grep | wc -l" || echo "0")
echo "$completed|$in_create|$active_clones"
}
echo ""
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
echo "🔄 RPC Migration Monitor"
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
echo ""
log_info "Monitoring Proxmox host: $PROXMOX_HOST"
log_info "Check interval: $CHECK_INTERVAL seconds"
log_info "Max iterations: $MAX_ITERATIONS"
echo ""
iteration=0
while [ $iteration -lt $MAX_ITERATIONS ]; do
iteration=$((iteration + 1))
echo ""
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
log_info "Check #$iteration - $(date '+%Y-%m-%d %H:%M:%S')"
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
# Check progress
IFS='|' read -r completed in_create active_clones <<< "$(check_progress)"
log_info "Progress: $completed/12 completed, $in_create in create status, $active_clones active clones"
# Find containers in create status
containers_in_create=$(ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no root@"$PROXMOX_HOST" \
"pct list | grep -E '^2101|^2201|^2301|^2303|^2304|^2305|^2306|^2307|^2308|^2403' | grep 'create' | awk '{print \$1}'" || echo "")
if [ -n "$containers_in_create" ]; then
echo ""
log_info "Found containers in create status, attempting to fix..."
for vmid in $containers_in_create; do
fix_container "$vmid"
done
fi
# If no active clones and migration script not running, try to continue
if [ "$active_clones" -eq 0 ]; then
script_running=$(ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no root@"$PROXMOX_HOST" \
"ps aux | grep 'migrate-rpc-vmids' | grep -v grep | wc -l" || echo "0")
if [ "$script_running" -eq 0 ] && [ "$completed" -lt 12 ]; then
log_info "No active clones, checking if we should continue migration..."
# Check if there are containers that need fixing first
needs_fixing=0
for vmid in "${TARGET_VMIDS[@]}"; do
status=$(ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no root@"$PROXMOX_HOST" \
"pct list | grep '^$vmid ' | awk '{print \$3}'" 2>/dev/null || echo "")
if [ "$status" = "create" ]; then
needs_fixing=1
break
fi
done
if [ "$needs_fixing" -eq 0 ]; then
log_info "All containers fixed, migration can continue"
fi
fi
fi
# Check if migration is complete
if [ "$completed" -ge 12 ] && [ "$in_create" -eq 0 ] && [ "$active_clones" -eq 0 ]; then
log_success "Migration complete! All 12 containers created and configured."
echo ""
ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no root@"$PROXMOX_HOST" \
"pct list | grep -E '^2101|^2201|^2301|^2303|^2304|^2305|^2306|^2307|^2308|^2403'"
exit 0
fi
# Wait before next check
if [ $iteration -lt $MAX_ITERATIONS ]; then
log_info "Waiting $CHECK_INTERVAL seconds before next check..."
sleep "$CHECK_INTERVAL"
fi
done
log_warn "Reached maximum iterations. Migration may still be in progress."
echo ""
log_info "Final status:"
check_progress | awk -F'|' '{print " Completed: "$1"/12\n In create: "$2"\n Active clones: "$3}'
echo ""