379 lines
13 KiB
Bash
379 lines
13 KiB
Bash
|
|
#!/usr/bin/env bash
|
||
|
|
# Complete All Remaining Migrations
|
||
|
|
# 1. Migrate 7 containers from ml110 to r630-02 using backup/restore method
|
||
|
|
# 2. Migrate containers from thin2 to free storage on r630-02
|
||
|
|
|
||
|
|
set -euo pipefail
|
||
|
|
|
||
|
|
# Load IP configuration
|
||
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||
|
|
PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||
|
|
source "${PROJECT_ROOT}/config/ip-addresses.conf" 2>/dev/null || true
|
||
|
|
|
||
|
|
|
||
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||
|
|
PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||
|
|
REPORT_DIR="${PROJECT_ROOT}/reports/status"
|
||
|
|
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
|
||
|
|
COMPLETE_LOG="${REPORT_DIR}/complete_migrations_${TIMESTAMP}.log"
|
||
|
|
|
||
|
|
# Colors
|
||
|
|
RED='\033[0;31m'
|
||
|
|
GREEN='\033[0;32m'
|
||
|
|
YELLOW='\033[1;33m'
|
||
|
|
BLUE='\033[0;34m'
|
||
|
|
CYAN='\033[0;36m'
|
||
|
|
MAGENTA='\033[0;35m'
|
||
|
|
NC='\033[0m'
|
||
|
|
|
||
|
|
log_info() { echo -e "${BLUE}[INFO]${NC} $1" | tee -a "$COMPLETE_LOG"; }
|
||
|
|
log_success() { echo -e "${GREEN}[✓]${NC} $1" | tee -a "$COMPLETE_LOG"; }
|
||
|
|
log_warn() { echo -e "${YELLOW}[⚠]${NC} $1" | tee -a "$COMPLETE_LOG"; }
|
||
|
|
log_error() { echo -e "${RED}[✗]${NC} $1" | tee -a "$COMPLETE_LOG"; }
|
||
|
|
log_header() { echo -e "${CYAN}=== $1 ===${NC}" | tee -a "$COMPLETE_LOG"; }
|
||
|
|
log_section() { echo -e "\n${MAGENTA}>>> $1 <<<${NC}\n" | tee -a "$COMPLETE_LOG"; }
|
||
|
|
|
||
|
|
mkdir -p "$REPORT_DIR"
|
||
|
|
|
||
|
|
declare -A NODES
|
||
|
|
NODES[ml110]="${PROXMOX_HOST_ML110:-192.168.11.10}:L@kers2010"
|
||
|
|
NODES[r630-02]="${PROXMOX_HOST_R630_02:-192.168.11.12}:password"
|
||
|
|
|
||
|
|
ssh_node() {
|
||
|
|
local hostname="$1"
|
||
|
|
shift
|
||
|
|
local ip="${NODES[$hostname]%%:*}"
|
||
|
|
local password="${NODES[$hostname]#*:}"
|
||
|
|
|
||
|
|
if command -v sshpass >/dev/null 2>&1; then
|
||
|
|
sshpass -p "$password" ssh -o StrictHostKeyChecking=no -o ConnectTimeout=10 root@"$ip" "$@"
|
||
|
|
else
|
||
|
|
ssh -o StrictHostKeyChecking=no -o ConnectTimeout=10 root@"$ip" "$@"
|
||
|
|
fi
|
||
|
|
}
|
||
|
|
|
||
|
|
check_node() {
|
||
|
|
local hostname="$1"
|
||
|
|
local ip="${NODES[$hostname]%%:*}"
|
||
|
|
ping -c 1 -W 5 "$ip" >/dev/null 2>&1
|
||
|
|
}
|
||
|
|
|
||
|
|
# Migrate container using backup/restore method
|
||
|
|
migrate_via_backup() {
|
||
|
|
local vmid=$1
|
||
|
|
local name=$2
|
||
|
|
local source_node=$3
|
||
|
|
local target_node=$4
|
||
|
|
local target_storage=$5
|
||
|
|
|
||
|
|
log_info "========================================="
|
||
|
|
log_info "Migrating CT $vmid ($name)"
|
||
|
|
log_info "From: $source_node -> To: $target_node"
|
||
|
|
log_info "Target Storage: $target_storage"
|
||
|
|
log_info "========================================="
|
||
|
|
|
||
|
|
# Step 1: Create backup on source node
|
||
|
|
log_info "Step 1: Creating backup of container $vmid on $source_node..."
|
||
|
|
log_warn "This may take 5-15 minutes depending on container size..."
|
||
|
|
|
||
|
|
# Check container status
|
||
|
|
local status=$(ssh_node "$source_node" "pct status $vmid 2>/dev/null | awk '{print \$2}'" || echo "not_found")
|
||
|
|
if [ "$status" = "not_found" ]; then
|
||
|
|
log_error "Container $vmid not found on $source_node"
|
||
|
|
return 1
|
||
|
|
fi
|
||
|
|
|
||
|
|
log_info "Container status: $status"
|
||
|
|
|
||
|
|
# Create backup to local storage (always available)
|
||
|
|
local backup_result=$(ssh_node "$source_node" bash <<ENDSSH
|
||
|
|
# Create backup directory if needed
|
||
|
|
mkdir -p /var/lib/vz/dump 2>/dev/null || true
|
||
|
|
|
||
|
|
# Create backup
|
||
|
|
vzdump $vmid \\
|
||
|
|
--storage local \\
|
||
|
|
--compress gzip \\
|
||
|
|
--mode stop \\
|
||
|
|
--remove 0 2>&1
|
||
|
|
ENDSSH
|
||
|
|
)
|
||
|
|
|
||
|
|
if echo "$backup_result" | grep -q "error\|Error\|ERROR\|failed"; then
|
||
|
|
log_error "Backup failed: $backup_result"
|
||
|
|
return 1
|
||
|
|
fi
|
||
|
|
|
||
|
|
log_success "Backup completed successfully"
|
||
|
|
|
||
|
|
# Find backup file
|
||
|
|
local backup_file=$(ssh_node "$source_node" "ls -t /var/lib/vz/dump/vzdump-lxc-$vmid-*.tar.gz 2>/dev/null | head -1" || echo "")
|
||
|
|
if [ -z "$backup_file" ]; then
|
||
|
|
log_error "Could not find backup file"
|
||
|
|
return 1
|
||
|
|
fi
|
||
|
|
|
||
|
|
log_info "Backup file: $backup_file"
|
||
|
|
|
||
|
|
# Get backup filename for transfer
|
||
|
|
local backup_name=$(basename "$backup_file")
|
||
|
|
|
||
|
|
# Step 2: Copy backup to target node
|
||
|
|
log_info "Step 2: Copying backup to $target_node..."
|
||
|
|
|
||
|
|
local source_ip="${NODES[$source_node]%%:*}"
|
||
|
|
local target_ip="${NODES[$target_node]%%:*}"
|
||
|
|
local source_password="${NODES[$source_node]#*:}"
|
||
|
|
|
||
|
|
# Prepare target directory
|
||
|
|
ssh_node "$target_node" "mkdir -p /var/lib/vz/dump" || true
|
||
|
|
|
||
|
|
# Copy backup file using scp
|
||
|
|
if command -v sshpass >/dev/null 2>&1; then
|
||
|
|
sshpass -p "$source_password" scp -o StrictHostKeyChecking=no -o ConnectTimeout=30 \
|
||
|
|
root@"$source_ip:$backup_file" \
|
||
|
|
root@"$target_ip:/var/lib/vz/dump/$backup_name" 2>&1 | tee -a "$COMPLETE_LOG"
|
||
|
|
else
|
||
|
|
scp -o StrictHostKeyChecking=no -o ConnectTimeout=30 \
|
||
|
|
root@"$source_ip:$backup_file" \
|
||
|
|
root@"$target_ip:/var/lib/vz/dump/$backup_name" 2>&1 | tee -a "$COMPLETE_LOG"
|
||
|
|
fi
|
||
|
|
|
||
|
|
if [ ${PIPESTATUS[0]} -ne 0 ]; then
|
||
|
|
log_error "Failed to copy backup file"
|
||
|
|
return 1
|
||
|
|
fi
|
||
|
|
|
||
|
|
log_success "Backup copied to $target_node"
|
||
|
|
|
||
|
|
# Step 3: Destroy container on source (required before restore)
|
||
|
|
log_info "Step 3: Destroying container on source node (required for restore)..."
|
||
|
|
|
||
|
|
local destroy_result=$(ssh_node "$source_node" "pct destroy $vmid --force 2>&1" || echo "destroy failed")
|
||
|
|
if echo "$destroy_result" | grep -q "error\|Error"; then
|
||
|
|
log_warn "Destroy failed (container may not exist): $destroy_result"
|
||
|
|
else
|
||
|
|
log_success "Container destroyed on source node"
|
||
|
|
fi
|
||
|
|
|
||
|
|
sleep 3
|
||
|
|
|
||
|
|
# Step 4: Restore container on target with specified storage
|
||
|
|
log_info "Step 4: Restoring container on $target_node with storage $target_storage..."
|
||
|
|
|
||
|
|
local restore_result=$(ssh_node "$target_node" bash <<ENDSSH
|
||
|
|
pct restore $vmid /var/lib/vz/dump/$backup_name \\
|
||
|
|
--storage $target_storage 2>&1
|
||
|
|
ENDSSH
|
||
|
|
)
|
||
|
|
|
||
|
|
if echo "$restore_result" | grep -q "error\|Error\|ERROR\|failed"; then
|
||
|
|
log_error "Restore failed: $restore_result"
|
||
|
|
log_warn "Container may need manual restoration"
|
||
|
|
return 1
|
||
|
|
fi
|
||
|
|
|
||
|
|
log_success "Container restored successfully on $target_node"
|
||
|
|
|
||
|
|
# Step 5: Start container
|
||
|
|
log_info "Step 5: Starting container on $target_node..."
|
||
|
|
|
||
|
|
local start_result=$(ssh_node "$target_node" "pct start $vmid 2>&1" || echo "start failed")
|
||
|
|
if echo "$start_result" | grep -q "error\|Error"; then
|
||
|
|
log_warn "Start failed (may already be running): $start_result"
|
||
|
|
else
|
||
|
|
log_success "Container started successfully"
|
||
|
|
fi
|
||
|
|
|
||
|
|
# Step 6: Verify
|
||
|
|
log_info "Step 6: Verifying migration..."
|
||
|
|
sleep 5
|
||
|
|
|
||
|
|
local verify_status=$(ssh_node "$target_node" "pct status $vmid 2>/dev/null | awk '{print \$2}'" || echo "not_found")
|
||
|
|
if [ "$verify_status" != "not_found" ]; then
|
||
|
|
log_success "Container $vmid is now on $target_node (status: $verify_status)"
|
||
|
|
|
||
|
|
# Verify storage
|
||
|
|
local actual_storage=$(ssh_node "$target_node" "pct config $vmid 2>/dev/null | grep '^rootfs:' | grep -o 'storage=[^,]*' | cut -d= -f2" || echo "")
|
||
|
|
log_info "Container storage: $actual_storage (expected: $target_storage)"
|
||
|
|
|
||
|
|
return 0
|
||
|
|
else
|
||
|
|
log_error "Container not found on target node after migration"
|
||
|
|
return 1
|
||
|
|
fi
|
||
|
|
}
|
||
|
|
|
||
|
|
# Migrate container within same node (storage change)
|
||
|
|
migrate_storage_same_node() {
|
||
|
|
local vmid=$1
|
||
|
|
local name=$2
|
||
|
|
local node=$3
|
||
|
|
local target_storage=$4
|
||
|
|
|
||
|
|
log_info "========================================="
|
||
|
|
log_info "Migrating CT $vmid ($name) storage on $node"
|
||
|
|
log_info "Target Storage: $target_storage"
|
||
|
|
log_info "========================================="
|
||
|
|
|
||
|
|
# Check if container exists
|
||
|
|
local status=$(ssh_node "$node" "pct status $vmid 2>/dev/null | awk '{print \$2}'" || echo "not_found")
|
||
|
|
if [ "$status" = "not_found" ]; then
|
||
|
|
log_error "Container $vmid not found on $node"
|
||
|
|
return 1
|
||
|
|
fi
|
||
|
|
|
||
|
|
# Stop container
|
||
|
|
log_info "Stopping container for storage migration..."
|
||
|
|
ssh_node "$node" "pct stop $vmid" 2>&1 || log_warn "Stop failed (may already be stopped)"
|
||
|
|
sleep 3
|
||
|
|
|
||
|
|
# Create backup
|
||
|
|
log_info "Creating backup for storage migration..."
|
||
|
|
local backup_result=$(ssh_node "$node" bash <<ENDSSH
|
||
|
|
vzdump $vmid \\
|
||
|
|
--storage local \\
|
||
|
|
--compress gzip \\
|
||
|
|
--mode stop \\
|
||
|
|
--remove 0 2>&1
|
||
|
|
ENDSSH
|
||
|
|
)
|
||
|
|
|
||
|
|
if echo "$backup_result" | grep -q "error\|Error\|ERROR\|failed"; then
|
||
|
|
log_error "Backup failed: $backup_result"
|
||
|
|
return 1
|
||
|
|
fi
|
||
|
|
|
||
|
|
# Find backup file
|
||
|
|
local backup_file=$(ssh_node "$node" "ls -t /var/lib/vz/dump/vzdump-lxc-$vmid-*.tar.gz 2>/dev/null | head -1" || echo "")
|
||
|
|
if [ -z "$backup_file" ]; then
|
||
|
|
log_error "Could not find backup file"
|
||
|
|
return 1
|
||
|
|
fi
|
||
|
|
|
||
|
|
local backup_name=$(basename "$backup_file")
|
||
|
|
|
||
|
|
# Destroy container
|
||
|
|
log_info "Destroying container for storage change..."
|
||
|
|
ssh_node "$node" "pct destroy $vmid --force" 2>&1 || log_warn "Destroy failed"
|
||
|
|
sleep 3
|
||
|
|
|
||
|
|
# Restore with new storage
|
||
|
|
log_info "Restoring container with new storage $target_storage..."
|
||
|
|
local restore_result=$(ssh_node "$node" bash <<ENDSSH
|
||
|
|
pct restore $vmid /var/lib/vz/dump/$backup_name \\
|
||
|
|
--storage $target_storage 2>&1
|
||
|
|
ENDSSH
|
||
|
|
)
|
||
|
|
|
||
|
|
if echo "$restore_result" | grep -q "error\|Error\|ERROR\|failed"; then
|
||
|
|
log_error "Restore failed: $restore_result"
|
||
|
|
return 1
|
||
|
|
fi
|
||
|
|
|
||
|
|
log_success "Container restored with new storage"
|
||
|
|
|
||
|
|
# Start container
|
||
|
|
log_info "Starting container..."
|
||
|
|
ssh_node "$node" "pct start $vmid" 2>&1 || log_warn "Start failed"
|
||
|
|
|
||
|
|
log_success "Storage migration complete"
|
||
|
|
return 0
|
||
|
|
}
|
||
|
|
|
||
|
|
# Main execution
|
||
|
|
main() {
|
||
|
|
log_header "Completing All Remaining Migrations"
|
||
|
|
echo "Log file: $COMPLETE_LOG" | tee -a "$COMPLETE_LOG"
|
||
|
|
echo "Timestamp: $(date)" | tee -a "$COMPLETE_LOG"
|
||
|
|
echo "" | tee -a "$COMPLETE_LOG"
|
||
|
|
|
||
|
|
local source_node="ml110"
|
||
|
|
local target_node="r630-02"
|
||
|
|
local target_storage="thin1-r630-02" # Best available storage
|
||
|
|
|
||
|
|
if ! check_node "$source_node" || ! check_node "$target_node"; then
|
||
|
|
log_error "One or more nodes are not reachable"
|
||
|
|
return 1
|
||
|
|
fi
|
||
|
|
|
||
|
|
# Containers to migrate from ml110 to r630-02
|
||
|
|
log_section "Migrating Containers from ml110 to r630-02"
|
||
|
|
|
||
|
|
declare -A containers
|
||
|
|
containers[1003]="besu-validator-4"
|
||
|
|
containers[1004]="besu-validator-5"
|
||
|
|
containers[1503]="besu-sentry-4"
|
||
|
|
containers[1504]="besu-sentry-ali"
|
||
|
|
containers[2201]="besu-rpc-public-1"
|
||
|
|
containers[2303]="besu-rpc-ali-0x8a"
|
||
|
|
containers[2401]="besu-rpc-thirdweb-0x8a-1"
|
||
|
|
|
||
|
|
local success_count=0
|
||
|
|
local fail_count=0
|
||
|
|
|
||
|
|
for vmid in "${!containers[@]}"; do
|
||
|
|
local name="${containers[$vmid]}"
|
||
|
|
|
||
|
|
if migrate_via_backup "$vmid" "$name" "$source_node" "$target_node" "$target_storage"; then
|
||
|
|
((success_count++))
|
||
|
|
log_info "Waiting 10 seconds before next migration..."
|
||
|
|
sleep 10
|
||
|
|
else
|
||
|
|
((fail_count++))
|
||
|
|
log_error "Failed to migrate CT $vmid"
|
||
|
|
fi
|
||
|
|
done
|
||
|
|
|
||
|
|
log_section "Migration Summary (ml110 -> r630-02)"
|
||
|
|
log_info "Successfully migrated: $success_count containers"
|
||
|
|
log_info "Failed: $fail_count containers"
|
||
|
|
|
||
|
|
# Fix thin2 capacity issue
|
||
|
|
log_section "Fixing thin2 Capacity Issue"
|
||
|
|
|
||
|
|
# Migrate containers from thin2 to free storage
|
||
|
|
local thin2_target="thin1-r630-02" # Use same storage as above
|
||
|
|
|
||
|
|
for vmid in 5000 6200; do
|
||
|
|
local name=""
|
||
|
|
if [ "$vmid" = "5000" ]; then
|
||
|
|
name="blockscout-1"
|
||
|
|
elif [ "$vmid" = "6200" ]; then
|
||
|
|
name=$(ssh_node "$target_node" "pct config $vmid 2>/dev/null | grep '^hostname:' | cut -d: -f2 | xargs" || echo "CT-$vmid")
|
||
|
|
fi
|
||
|
|
|
||
|
|
if migrate_storage_same_node "$vmid" "$name" "$target_node" "$thin2_target"; then
|
||
|
|
log_success "CT $vmid migrated off thin2"
|
||
|
|
sleep 5
|
||
|
|
else
|
||
|
|
log_error "Failed to migrate CT $vmid from thin2"
|
||
|
|
fi
|
||
|
|
done
|
||
|
|
|
||
|
|
# Final verification
|
||
|
|
log_section "Final System State Verification"
|
||
|
|
|
||
|
|
for hostname in "$source_node" "$target_node"; do
|
||
|
|
if check_node "$hostname"; then
|
||
|
|
local container_count=$(ssh_node "$hostname" "pct list 2>/dev/null | tail -n +2 | wc -l" || echo "0")
|
||
|
|
local running_count=$(ssh_node "$hostname" "pct list 2>/dev/null | grep running | wc -l" || echo "0")
|
||
|
|
local cpu_usage=$(ssh_node "$hostname" "top -bn1 2>/dev/null | grep 'Cpu(s)' | awk '{print \$2}' | sed 's/%us,//' || echo 'N/A'")
|
||
|
|
|
||
|
|
log_info "$hostname: $container_count containers ($running_count running), CPU: $cpu_usage%"
|
||
|
|
|
||
|
|
# Check thin2 usage on r630-02
|
||
|
|
if [ "$hostname" = "r630-02" ]; then
|
||
|
|
local thin2_usage=$(ssh_node "$hostname" "pvesm status 2>/dev/null | grep '^thin2' | awk '{print \$5}'" || echo "N/A")
|
||
|
|
log_info " thin2 storage usage: $thin2_usage"
|
||
|
|
fi
|
||
|
|
fi
|
||
|
|
done
|
||
|
|
|
||
|
|
log_header "All Migrations Complete"
|
||
|
|
log_info "Full log saved to: $COMPLETE_LOG"
|
||
|
|
log_success "All remaining migrations have been completed!"
|
||
|
|
}
|
||
|
|
|
||
|
|
main "$@"
|