#!/usr/bin/env bash # Start all stopped containers on pve2 that were reported as failed on r630-02 # Usage: ./scripts/start-containers-on-pve2.sh set -euo pipefail # Load IP configuration SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" source "${PROJECT_ROOT}/config/ip-addresses.conf" 2>/dev/null || true SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" # Configuration PVE2_IP="${PROXMOX_HOST_R630_01}" PVE2_NAME="pve2" # Colors RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' BLUE='\033[0;34m' CYAN='\033[0;36m' NC='\033[0m' log_info() { echo -e "${BLUE}[INFO]${NC} $1"; } log_success() { echo -e "${GREEN}[✓]${NC} $1"; } log_warn() { echo -e "${YELLOW}[WARN]${NC} $1"; } log_error() { echo -e "${RED}[ERROR]${NC} $1"; } log_section() { echo -e "\n${CYAN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"; } log_subsection() { echo -e "\n${CYAN} $1${NC}"; } # All containers that failed on r630-02 (but exist on pve2) ALL_CONTAINERS=(3000 3001 3002 3003 3500 3501 5200 6000 6400 10000 10001 10020 10030 10040 10050 10060 10070 10080 10090 10091 10092 10100 10101 10120 10130 10150 10151 10200 10201 10202 10210 10230 10232) echo "" log_section log_info " STARTING CONTAINERS ON $PVE2_NAME" log_section echo "" # Check SSH access log_info "Checking SSH access to $PVE2_NAME ($PVE2_IP)..." if ! ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no root@${PVE2_IP} "echo 'SSH OK'" &>/dev/null; then log_error "Cannot access $PVE2_NAME via SSH" exit 1 fi log_success "SSH access confirmed" # Function to check container status check_container_status() { local vmid=$1 ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no root@${PVE2_IP} \ "pct list 2>/dev/null | awk '\$1 == $vmid {print \$2}'" || echo "notfound" } # Function to get container hostname get_container_hostname() { local vmid=$1 ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no root@${PVE2_IP} \ "pct list 2>/dev/null | awk '\$1 == $vmid {for(i=3;i<=NF;i++) if(\$i != \"lock\") printf \"%s \", \$i; print \"\"}' | xargs" || echo "unknown" } # Function to clear lock for CT 10232 clear_lock() { local vmid=$1 log_info " Clearing lock for CT $vmid..." ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no root@${PVE2_IP} \ "rm -f /var/lock/qemu-server/lock-${vmid} /var/lock/qemu-server/lxc-${vmid} 2>/dev/null" || true sleep 2 } log_section log_info " PRE-START STATUS CHECK" log_section # Check current status (simplified) log_info "Checking container status..." STOPPED_LIST=$(ssh -o ConnectTimeout=10 -o StrictHostKeyChecking=no root@${PVE2_IP} \ "pct list 2>/dev/null | awk '\$2 == \"stopped\" {print \$1}'" || echo "") STOPPED_COUNT=0 for vmid in "${ALL_CONTAINERS[@]}"; do if echo "$STOPPED_LIST" | grep -q "^${vmid}$"; then ((STOPPED_COUNT++)) fi done log_info "Found $STOPPED_COUNT stopped container(s) to start" echo "" if [[ $STOPPED_COUNT -eq 0 ]]; then log_success "✅ All containers are already running!" exit 0 fi log_section log_info " STARTING CONTAINERS" log_section SUCCESS=0 FAILED=0 SKIPPED=0 for vmid in "${ALL_CONTAINERS[@]}"; do status=$(check_container_status "$vmid") hostname=$(get_container_hostname "$vmid") log_subsection "CT $vmid - $hostname" if [[ "$status" == "running" ]]; then log_success " ✅ Already running" ((SKIPPED++)) elif [[ "$status" == "stopped" ]]; then # Special handling for CT 10232 (locked) if [[ "$vmid" == "10232" ]]; then log_info " Clearing lock first..." clear_lock "$vmid" fi log_info " Starting container..." if ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no root@${PVE2_IP} \ "pct start $vmid" 2>&1; then log_success " ✅ Started successfully" ((SUCCESS++)) sleep 2 else log_error " ❌ Failed to start" # Show error ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no root@${PVE2_IP} \ "pct start $vmid 2>&1" || true ((FAILED++)) fi else log_warn " ⚠️ Container not found on $PVE2_NAME" ((SKIPPED++)) fi echo "" done # Wait for services to initialize log_info "Waiting 5 seconds for services to initialize..." sleep 5 log_section log_info " FINAL STATUS" log_section # Show final status log_info "Final container status on $PVE2_NAME:" ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no root@${PVE2_IP} \ "pct list 2>/dev/null | grep -E '^[[:space:]]*(3000|3001|3002|3003|3500|3501|5200|6000|6400|10000|10001|10020|10030|10040|10050|10060|10070|10080|10090|10091|10092|10100|10101|10120|10130|10150|10151|10200|10201|10202|10210|10230|10232)[[:space:]]' | awk '{printf \" VMID %s (%s): %s\\n\", \$1, \$3, \$2}'" || true echo "" log_section log_info " SUMMARY" log_section echo "" log_info "Results:" log_info " Successfully started: $SUCCESS" log_info " Failed to start: $FAILED" log_info " Skipped (already running/not found): $SKIPPED" echo "" if [[ $SUCCESS -gt 0 ]]; then log_success "✅ Started $SUCCESS container(s) on $PVE2_NAME" fi if [[ $FAILED -gt 0 ]]; then log_error "❌ $FAILED container(s) failed to start - check errors above" fi echo "" log_success "Start script complete!"