#!/usr/bin/env bash # Complete All Remaining Migrations # 1. Migrate 7 containers from ml110 to r630-02 using backup/restore method # 2. Migrate containers from thin2 to free storage on r630-02 set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" REPORT_DIR="${PROJECT_ROOT}/reports/status" TIMESTAMP=$(date +%Y%m%d_%H%M%S) COMPLETE_LOG="${REPORT_DIR}/complete_migrations_${TIMESTAMP}.log" # Colors RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' BLUE='\033[0;34m' CYAN='\033[0;36m' MAGENTA='\033[0;35m' NC='\033[0m' log_info() { echo -e "${BLUE}[INFO]${NC} $1" | tee -a "$COMPLETE_LOG"; } log_success() { echo -e "${GREEN}[✓]${NC} $1" | tee -a "$COMPLETE_LOG"; } log_warn() { echo -e "${YELLOW}[⚠]${NC} $1" | tee -a "$COMPLETE_LOG"; } log_error() { echo -e "${RED}[✗]${NC} $1" | tee -a "$COMPLETE_LOG"; } log_header() { echo -e "${CYAN}=== $1 ===${NC}" | tee -a "$COMPLETE_LOG"; } log_section() { echo -e "\n${MAGENTA}>>> $1 <<<${NC}\n" | tee -a "$COMPLETE_LOG"; } mkdir -p "$REPORT_DIR" declare -A NODES NODES[ml110]="192.168.11.10:L@kers2010" NODES[r630-02]="192.168.11.12:password" ssh_node() { local hostname="$1" shift local ip="${NODES[$hostname]%%:*}" local password="${NODES[$hostname]#*:}" if command -v sshpass >/dev/null 2>&1; then sshpass -p "$password" ssh -o StrictHostKeyChecking=no -o ConnectTimeout=10 root@"$ip" "$@" else ssh -o StrictHostKeyChecking=no -o ConnectTimeout=10 root@"$ip" "$@" fi } check_node() { local hostname="$1" local ip="${NODES[$hostname]%%:*}" ping -c 1 -W 5 "$ip" >/dev/null 2>&1 } # Migrate container using backup/restore method migrate_via_backup() { local vmid=$1 local name=$2 local source_node=$3 local target_node=$4 local target_storage=$5 log_info "=========================================" log_info "Migrating CT $vmid ($name)" log_info "From: $source_node -> To: $target_node" log_info "Target Storage: $target_storage" log_info "=========================================" # Step 1: Create backup on source node log_info "Step 1: Creating backup of container $vmid on $source_node..." log_warn "This may take 5-15 minutes depending on container size..." # Check container status local status=$(ssh_node "$source_node" "pct status $vmid 2>/dev/null | awk '{print \$2}'" || echo "not_found") if [ "$status" = "not_found" ]; then log_error "Container $vmid not found on $source_node" return 1 fi log_info "Container status: $status" # Create backup to local storage (always available) local backup_result=$(ssh_node "$source_node" bash </dev/null || true # Create backup vzdump $vmid \\ --storage local \\ --compress gzip \\ --mode stop \\ --remove 0 2>&1 ENDSSH ) if echo "$backup_result" | grep -q "error\|Error\|ERROR\|failed"; then log_error "Backup failed: $backup_result" return 1 fi log_success "Backup completed successfully" # Find backup file local backup_file=$(ssh_node "$source_node" "ls -t /var/lib/vz/dump/vzdump-lxc-$vmid-*.tar.gz 2>/dev/null | head -1" || echo "") if [ -z "$backup_file" ]; then log_error "Could not find backup file" return 1 fi log_info "Backup file: $backup_file" # Get backup filename for transfer local backup_name=$(basename "$backup_file") # Step 2: Copy backup to target node log_info "Step 2: Copying backup to $target_node..." local source_ip="${NODES[$source_node]%%:*}" local target_ip="${NODES[$target_node]%%:*}" local source_password="${NODES[$source_node]#*:}" # Prepare target directory ssh_node "$target_node" "mkdir -p /var/lib/vz/dump" || true # Copy backup file using scp if command -v sshpass >/dev/null 2>&1; then sshpass -p "$source_password" scp -o StrictHostKeyChecking=no -o ConnectTimeout=30 \ root@"$source_ip:$backup_file" \ root@"$target_ip:/var/lib/vz/dump/$backup_name" 2>&1 | tee -a "$COMPLETE_LOG" else scp -o StrictHostKeyChecking=no -o ConnectTimeout=30 \ root@"$source_ip:$backup_file" \ root@"$target_ip:/var/lib/vz/dump/$backup_name" 2>&1 | tee -a "$COMPLETE_LOG" fi if [ ${PIPESTATUS[0]} -ne 0 ]; then log_error "Failed to copy backup file" return 1 fi log_success "Backup copied to $target_node" # Step 3: Destroy container on source (required before restore) log_info "Step 3: Destroying container on source node (required for restore)..." local destroy_result=$(ssh_node "$source_node" "pct destroy $vmid --force 2>&1" || echo "destroy failed") if echo "$destroy_result" | grep -q "error\|Error"; then log_warn "Destroy failed (container may not exist): $destroy_result" else log_success "Container destroyed on source node" fi sleep 3 # Step 4: Restore container on target with specified storage log_info "Step 4: Restoring container on $target_node with storage $target_storage..." local restore_result=$(ssh_node "$target_node" bash <&1 ENDSSH ) if echo "$restore_result" | grep -q "error\|Error\|ERROR\|failed"; then log_error "Restore failed: $restore_result" log_warn "Container may need manual restoration" return 1 fi log_success "Container restored successfully on $target_node" # Step 5: Start container log_info "Step 5: Starting container on $target_node..." local start_result=$(ssh_node "$target_node" "pct start $vmid 2>&1" || echo "start failed") if echo "$start_result" | grep -q "error\|Error"; then log_warn "Start failed (may already be running): $start_result" else log_success "Container started successfully" fi # Step 6: Verify log_info "Step 6: Verifying migration..." sleep 5 local verify_status=$(ssh_node "$target_node" "pct status $vmid 2>/dev/null | awk '{print \$2}'" || echo "not_found") if [ "$verify_status" != "not_found" ]; then log_success "Container $vmid is now on $target_node (status: $verify_status)" # Verify storage local actual_storage=$(ssh_node "$target_node" "pct config $vmid 2>/dev/null | grep '^rootfs:' | grep -o 'storage=[^,]*' | cut -d= -f2" || echo "") log_info "Container storage: $actual_storage (expected: $target_storage)" return 0 else log_error "Container not found on target node after migration" return 1 fi } # Migrate container within same node (storage change) migrate_storage_same_node() { local vmid=$1 local name=$2 local node=$3 local target_storage=$4 log_info "=========================================" log_info "Migrating CT $vmid ($name) storage on $node" log_info "Target Storage: $target_storage" log_info "=========================================" # Check if container exists local status=$(ssh_node "$node" "pct status $vmid 2>/dev/null | awk '{print \$2}'" || echo "not_found") if [ "$status" = "not_found" ]; then log_error "Container $vmid not found on $node" return 1 fi # Stop container log_info "Stopping container for storage migration..." ssh_node "$node" "pct stop $vmid" 2>&1 || log_warn "Stop failed (may already be stopped)" sleep 3 # Create backup log_info "Creating backup for storage migration..." local backup_result=$(ssh_node "$node" bash <&1 ENDSSH ) if echo "$backup_result" | grep -q "error\|Error\|ERROR\|failed"; then log_error "Backup failed: $backup_result" return 1 fi # Find backup file local backup_file=$(ssh_node "$node" "ls -t /var/lib/vz/dump/vzdump-lxc-$vmid-*.tar.gz 2>/dev/null | head -1" || echo "") if [ -z "$backup_file" ]; then log_error "Could not find backup file" return 1 fi local backup_name=$(basename "$backup_file") # Destroy container log_info "Destroying container for storage change..." ssh_node "$node" "pct destroy $vmid --force" 2>&1 || log_warn "Destroy failed" sleep 3 # Restore with new storage log_info "Restoring container with new storage $target_storage..." local restore_result=$(ssh_node "$node" bash <&1 ENDSSH ) if echo "$restore_result" | grep -q "error\|Error\|ERROR\|failed"; then log_error "Restore failed: $restore_result" return 1 fi log_success "Container restored with new storage" # Start container log_info "Starting container..." ssh_node "$node" "pct start $vmid" 2>&1 || log_warn "Start failed" log_success "Storage migration complete" return 0 } # Main execution main() { log_header "Completing All Remaining Migrations" echo "Log file: $COMPLETE_LOG" | tee -a "$COMPLETE_LOG" echo "Timestamp: $(date)" | tee -a "$COMPLETE_LOG" echo "" | tee -a "$COMPLETE_LOG" local source_node="ml110" local target_node="r630-02" local target_storage="thin1-r630-02" # Best available storage if ! check_node "$source_node" || ! check_node "$target_node"; then log_error "One or more nodes are not reachable" return 1 fi # Containers to migrate from ml110 to r630-02 log_section "Migrating Containers from ml110 to r630-02" declare -A containers containers[1003]="besu-validator-4" containers[1004]="besu-validator-5" containers[1503]="besu-sentry-4" containers[1504]="besu-sentry-ali" containers[2201]="besu-rpc-public-1" containers[2303]="besu-rpc-ali-0x8a" containers[2401]="besu-rpc-thirdweb-0x8a-1" local success_count=0 local fail_count=0 for vmid in "${!containers[@]}"; do local name="${containers[$vmid]}" if migrate_via_backup "$vmid" "$name" "$source_node" "$target_node" "$target_storage"; then ((success_count++)) log_info "Waiting 10 seconds before next migration..." sleep 10 else ((fail_count++)) log_error "Failed to migrate CT $vmid" fi done log_section "Migration Summary (ml110 -> r630-02)" log_info "Successfully migrated: $success_count containers" log_info "Failed: $fail_count containers" # Fix thin2 capacity issue log_section "Fixing thin2 Capacity Issue" # Migrate containers from thin2 to free storage local thin2_target="thin1-r630-02" # Use same storage as above for vmid in 5000 6200; do local name="" if [ "$vmid" = "5000" ]; then name="blockscout-1" elif [ "$vmid" = "6200" ]; then name=$(ssh_node "$target_node" "pct config $vmid 2>/dev/null | grep '^hostname:' | cut -d: -f2 | xargs" || echo "CT-$vmid") fi if migrate_storage_same_node "$vmid" "$name" "$target_node" "$thin2_target"; then log_success "CT $vmid migrated off thin2" sleep 5 else log_error "Failed to migrate CT $vmid from thin2" fi done # Final verification log_section "Final System State Verification" for hostname in "$source_node" "$target_node"; do if check_node "$hostname"; then local container_count=$(ssh_node "$hostname" "pct list 2>/dev/null | tail -n +2 | wc -l" || echo "0") local running_count=$(ssh_node "$hostname" "pct list 2>/dev/null | grep running | wc -l" || echo "0") local cpu_usage=$(ssh_node "$hostname" "top -bn1 2>/dev/null | grep 'Cpu(s)' | awk '{print \$2}' | sed 's/%us,//' || echo 'N/A'") log_info "$hostname: $container_count containers ($running_count running), CPU: $cpu_usage%" # Check thin2 usage on r630-02 if [ "$hostname" = "r630-02" ]; then local thin2_usage=$(ssh_node "$hostname" "pvesm status 2>/dev/null | grep '^thin2' | awk '{print \$5}'" || echo "N/A") log_info " thin2 storage usage: $thin2_usage" fi fi done log_header "All Migrations Complete" log_info "Full log saved to: $COMPLETE_LOG" log_success "All remaining migrations have been completed!" } main "$@"