Files
proxmox/smom-dbis-138-proxmox/scripts/deployment/deploy-validated-set.sh

506 lines
18 KiB
Bash
Executable File

#!/usr/bin/env bash
# Deploy Validated Set - Complete Deployment Orchestrator
# Script-based deployment for Besu validated set (no boot node required)
#
# This script orchestrates the complete deployment of Besu nodes:
# 1. Deploy containers (validators, sentries, RPC nodes)
# 2. Copy configuration files and keys
# 3. Bootstrap network (generate static-nodes.json)
# 4. Validate deployment
#
# Features:
# - Progress tracking with ETA
# - Comprehensive error handling
# - Rollback support
# - Detailed logging
# - Timeout protection
# - Container status validation
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
# Source libraries with error handling
source "$PROJECT_ROOT/lib/common.sh" || {
echo "ERROR: Failed to load common.sh library" >&2
exit 1
}
# Optional: Load progress tracking if available
if [[ -f "$PROJECT_ROOT/lib/progress-tracking.sh" ]]; then
source "$PROJECT_ROOT/lib/progress-tracking.sh"
USE_ADVANCED_PROGRESS=true
else
USE_ADVANCED_PROGRESS=false
fi
# Parse command line arguments
SKIP_DEPLOYMENT="${SKIP_DEPLOYMENT:-false}"
SKIP_CONFIG="${SKIP_CONFIG:-false}"
SKIP_BOOTSTRAP="${SKIP_BOOTSTRAP:-false}"
SKIP_VALIDATION="${SKIP_VALIDATION:-false}"
SOURCE_PROJECT="${SOURCE_PROJECT:-}"
TIMEOUT_DEPLOYMENT="${TIMEOUT_DEPLOYMENT:-3600}" # 1 hour default
TIMEOUT_CONFIG="${TIMEOUT_CONFIG:-1800}" # 30 minutes default
TIMEOUT_BOOTSTRAP="${TIMEOUT_BOOTSTRAP:-300}" # 5 minutes default
while [[ $# -gt 0 ]]; do
case $1 in
--skip-deployment)
SKIP_DEPLOYMENT=true
shift
;;
--skip-config)
SKIP_CONFIG=true
shift
;;
--skip-bootstrap)
SKIP_BOOTSTRAP=true
shift
;;
--skip-validation)
SKIP_VALIDATION=true
shift
;;
--source-project)
SOURCE_PROJECT="$2"
shift 2
;;
--timeout-deployment)
TIMEOUT_DEPLOYMENT="$2"
shift 2
;;
--timeout-config)
TIMEOUT_CONFIG="$2"
shift 2
;;
--help)
cat << EOF
Usage: $0 [OPTIONS]
Deploy Validated Set - Complete Besu Network Deployment Orchestrator
Options:
--skip-deployment Skip container deployment (assume containers exist)
--skip-config Skip configuration file copying
--skip-bootstrap Skip network bootstrap
--skip-validation Skip validation
--source-project PATH Path to source project with config files
(default: ../smom-dbis-138 relative to project root)
--timeout-deployment SEC Timeout for deployment phase (default: 3600)
--timeout-config SEC Timeout for config phase (default: 1800)
--timeout-bootstrap SEC Timeout for bootstrap phase (default: 300)
--help Show this help message
Examples:
# Full deployment
$0 --source-project /opt/smom-dbis-138
# Skip deployment, only configure
$0 --skip-deployment --source-project /opt/smom-dbis-138
# Only validate existing deployment
$0 --skip-deployment --skip-config --skip-bootstrap
EOF
exit 0
;;
*)
log_error "Unknown option: $1"
log_info "Use --help for usage information"
exit 1
;;
esac
done
# Load configuration
load_config || {
log_warn "Failed to load main configuration, using defaults"
}
load_config "$PROJECT_ROOT/config/network.conf" || {
log_warn "network.conf not found, using defaults"
}
# Track deployment start time
DEPLOYMENT_START_TIME=$(date +%s)
# Display header
log_info "========================================="
log_info "Deploy Validated Set - Script-Based Approach"
log_info "========================================="
log_info "Started: $(date '+%Y-%m-%d %H:%M:%S')"
log_info ""
# Calculate total steps based on what's being executed
TOTAL_STEPS=0
[[ "$SKIP_DEPLOYMENT" != "true" ]] && TOTAL_STEPS=$((TOTAL_STEPS + 1))
[[ "$SKIP_CONFIG" != "true" ]] && TOTAL_STEPS=$((TOTAL_STEPS + 1))
[[ "$SKIP_BOOTSTRAP" != "true" ]] && TOTAL_STEPS=$((TOTAL_STEPS + 1))
[[ "$SKIP_VALIDATION" != "true" ]] && TOTAL_STEPS=$((TOTAL_STEPS + 1))
CURRENT_STEP=0
# Enhanced progress tracking function
show_progress() {
local step_name="$1"
local phase_start_time="${2:-$(date +%s)}"
CURRENT_STEP=$((CURRENT_STEP + 1))
local percent=$((CURRENT_STEP * 100 / TOTAL_STEPS))
if [[ "$USE_ADVANCED_PROGRESS" == "true" ]] && command_exists update_progress 2>/dev/null; then
update_progress "$CURRENT_STEP" "$step_name"
else
log_info "Progress: [$percent%] [$CURRENT_STEP/$TOTAL_STEPS] - $step_name"
fi
}
# Time tracking function
track_phase_time() {
local phase_name="$1"
local start_time="$2"
local end_time=$(date +%s)
local duration=$((end_time - start_time))
local minutes=$((duration / 60))
local seconds=$((duration % 60))
log_info "$phase_name completed in ${minutes}m ${seconds}s"
}
# Run command with timeout
run_with_timeout() {
local timeout_sec="$1"
shift
local cmd=("$@")
if command_exists timeout; then
timeout "$timeout_sec" "${cmd[@]}" || {
local exit_code=$?
if [[ $exit_code -eq 124 ]]; then
log_error "Command timed out after ${timeout_sec} seconds: ${cmd[*]}"
return 124
fi
return $exit_code
}
else
# Fallback: run without timeout if timeout command not available
"${cmd[@]}"
fi
}
# Check prerequisites
log_info "=== Pre-Deployment Validation ==="
if ! command_exists pct; then
error_exit "pct command not found. This script must be run on Proxmox host."
fi
if [[ $EUID -ne 0 ]]; then
error_exit "This script must be run as root"
fi
# Validate project structure
if [[ ! -d "$PROJECT_ROOT/scripts" ]]; then
error_exit "Invalid project structure: scripts directory not found"
fi
if [[ ! -d "$PROJECT_ROOT/config" ]]; then
log_warn "config directory not found, some features may not work"
fi
log_success "Prerequisites checked"
# Ensure OS template exists
log_info "Checking OS template..."
ensure_os_template "${CONTAINER_OS_TEMPLATE:-local:vztmpl/ubuntu-22.04-standard_22.04-1_amd64.tar.zst}" || {
error_exit "OS template not available. Please download it first: pveam download local ubuntu-22.04-standard_22.04-1_amd64.tar.zst"
}
# Create log directory early
mkdir -p "$PROJECT_ROOT/logs"
LOG_FILE="$PROJECT_ROOT/logs/deploy-validated-set-$(date +%Y%m%d-%H%M%S).log"
log_info "Deployment log: $LOG_FILE"
# Setup logging - preserve stdout/stderr for interactive use, also log to file
# Use exec to redirect, but preserve original descriptors
exec 3>&1 4>&2
exec 1> >(tee -a "$LOG_FILE")
exec 2> >(tee -a "$LOG_FILE" >&2)
# Create snapshot helper function
create_snapshot_if_needed() {
local vmid=$1
local reason=$2
if command -v pct >/dev/null 2>&1 && [[ -f "$PROJECT_ROOT/scripts/manage/snapshot-before-change.sh" ]]; then
log_info "Creating snapshot for container $vmid before $reason..."
"$PROJECT_ROOT/scripts/manage/snapshot-before-change.sh" "$vmid" "pre-$reason-$(date +%Y%m%d-%H%M%S)" || {
log_warn "Snapshot creation failed (continuing anyway)"
}
fi
}
# Initialize rollback tracking
if command_exists init_rollback 2>/dev/null; then
ROLLBACK_LOG="$PROJECT_ROOT/logs/rollback-$(date +%Y%m%d-%H%M%S).log"
init_rollback "$ROLLBACK_LOG"
set_rollback_trap
log_info "Rollback tracking enabled: $ROLLBACK_LOG"
fi
# Initialize advanced progress tracking if available
if [[ "$USE_ADVANCED_PROGRESS" == "true" ]] && command_exists init_progress_tracking 2>/dev/null; then
init_progress_tracking "$TOTAL_STEPS" "Deployment"
fi
# Resolve SOURCE_PROJECT path early if provided
if [[ -n "$SOURCE_PROJECT" ]]; then
# Resolve absolute path for SOURCE_PROJECT if relative
if [[ "$SOURCE_PROJECT" != /* ]]; then
SOURCE_PROJECT="$(cd "$PROJECT_ROOT" && cd "$SOURCE_PROJECT" 2>/dev/null && pwd || echo "$PROJECT_ROOT/$SOURCE_PROJECT")"
fi
if [[ ! -d "$SOURCE_PROJECT" ]]; then
log_error "Source project directory not found: $SOURCE_PROJECT"
exit 1
fi
log_info "Source project: $SOURCE_PROJECT"
fi
log_info ""
# Phase 1: Deploy Containers
if [[ "$SKIP_DEPLOYMENT" != "true" ]]; then
PHASE_START_1=$(date +%s)
show_progress "Deploy Containers" "$PHASE_START_1"
log_info ""
log_info "========================================="
log_info "Phase 1: Deploy Containers"
log_info "========================================="
log_info "Timeout: ${TIMEOUT_DEPLOYMENT}s"
log_info ""
if [[ -f "$SCRIPT_DIR/deploy-besu-nodes.sh" ]]; then
log_info "Deploying Besu nodes..."
if ! run_with_timeout "$TIMEOUT_DEPLOYMENT" bash "$SCRIPT_DIR/deploy-besu-nodes.sh"; then
log_error "Besu nodes deployment failed or timed out"
log_error "Check logs for details: $LOG_FILE"
exit 1
fi
track_phase_time "Container deployment" "$PHASE_START_1"
log_success "Besu nodes deployed"
else
log_error "Besu deployment script not found: $SCRIPT_DIR/deploy-besu-nodes.sh"
exit 1
fi
else
log_info "Skipping container deployment (--skip-deployment)"
log_info "Assuming containers already exist"
fi
# Phase 2: Copy Configuration Files
if [[ "$SKIP_CONFIG" != "true" ]]; then
PHASE_START_2=$(date +%s)
show_progress "Copy Configuration Files" "$PHASE_START_2"
log_info ""
log_info "========================================="
log_info "Phase 2: Copy Configuration Files"
log_info "========================================="
log_info "Timeout: ${TIMEOUT_CONFIG}s"
log_info ""
if [[ -z "$SOURCE_PROJECT" ]]; then
# Try default location
DEFAULT_SOURCE="$PROJECT_ROOT/../smom-dbis-138"
if [[ -d "$DEFAULT_SOURCE" ]]; then
SOURCE_PROJECT="$DEFAULT_SOURCE"
log_info "Using default source project: $SOURCE_PROJECT"
else
log_warn "Source project not specified and default not found: $DEFAULT_SOURCE"
log_warn "Skipping configuration copy"
log_info "Specify with: --source-project PATH"
fi
fi
if [[ -n "$SOURCE_PROJECT" ]] && [[ -d "$SOURCE_PROJECT" ]]; then
# Run prerequisites check first
if [[ -f "$PROJECT_ROOT/scripts/validation/check-prerequisites.sh" ]]; then
log_info "Checking prerequisites..."
if ! bash "$PROJECT_ROOT/scripts/validation/check-prerequisites.sh" "$SOURCE_PROJECT"; then
log_error "Prerequisites check failed"
log_error "Review the output above and fix issues before continuing"
exit 1
fi
log_success "Prerequisites validated"
fi
export SOURCE_PROJECT
copy_success=false
# Try enhanced copy script first (supports config/nodes/ structure), then fallback
if [[ -f "$PROJECT_ROOT/scripts/copy-besu-config-with-nodes.sh" ]]; then
log_info "Copying Besu configuration files (with nodes/ structure support)..."
if run_with_timeout "$TIMEOUT_CONFIG" bash "$PROJECT_ROOT/scripts/copy-besu-config-with-nodes.sh" "$SOURCE_PROJECT"; then
copy_success=true
log_success "Configuration files copied (with nodes/ structure)"
else
log_warn "Enhanced copy script failed, trying standard copy script..."
fi
fi
# Fallback to standard copy script
if [[ "$copy_success" != "true" ]] && [[ -f "$PROJECT_ROOT/scripts/copy-besu-config.sh" ]]; then
log_info "Copying Besu configuration files (standard method)..."
if ! run_with_timeout "$TIMEOUT_CONFIG" bash "$PROJECT_ROOT/scripts/copy-besu-config.sh" "$SOURCE_PROJECT"; then
log_error "Configuration copy failed"
log_error "Check logs for details: $LOG_FILE"
exit 1
fi
copy_success=true
log_success "Configuration files copied"
fi
if [[ "$copy_success" != "true" ]]; then
log_error "No working copy script available"
log_error "Checked:"
log_error " - $PROJECT_ROOT/scripts/copy-besu-config-with-nodes.sh"
log_error " - $PROJECT_ROOT/scripts/copy-besu-config.sh"
exit 1
fi
track_phase_time "Configuration copy" "$PHASE_START_2"
else
log_warn "Source project not available, skipping configuration copy"
log_info "You can copy configuration files manually or run:"
log_info " $PROJECT_ROOT/scripts/copy-besu-config.sh $SOURCE_PROJECT"
fi
else
log_info "Skipping configuration copy (--skip-config)"
fi
# Phase 3: Bootstrap Network
if [[ "$SKIP_BOOTSTRAP" != "true" ]]; then
PHASE_START_3=$(date +%s)
show_progress "Bootstrap Network" "$PHASE_START_3"
log_info ""
log_info "========================================="
log_info "Phase 3: Bootstrap Network"
log_info "========================================="
log_info "Timeout: ${TIMEOUT_BOOTSTRAP}s"
log_info ""
if [[ -f "$PROJECT_ROOT/scripts/network/bootstrap-network.sh" ]]; then
log_info "Bootstrapping network (generating static-nodes.json)..."
if ! run_with_timeout "$TIMEOUT_BOOTSTRAP" bash "$PROJECT_ROOT/scripts/network/bootstrap-network.sh"; then
log_error "Network bootstrap failed or timed out"
log_error "Check logs for details: $LOG_FILE"
exit 1
fi
track_phase_time "Network bootstrap" "$PHASE_START_3"
log_success "Network bootstrapped"
else
log_error "Bootstrap script not found: $PROJECT_ROOT/scripts/network/bootstrap-network.sh"
exit 1
fi
else
log_info "Skipping network bootstrap (--skip-bootstrap)"
fi
# Phase 4: Validate Deployment
if [[ "$SKIP_VALIDATION" != "true" ]]; then
PHASE_START_4=$(date +%s)
show_progress "Validate Deployment" "$PHASE_START_4"
log_info ""
log_info "========================================="
log_info "Phase 4: Validate Deployment"
log_info "========================================="
log_info ""
# Comprehensive deployment validation
if [[ -f "$PROJECT_ROOT/scripts/validation/validate-deployment-comprehensive.sh" ]]; then
log_info "Running comprehensive deployment validation..."
if ! bash "$PROJECT_ROOT/scripts/validation/validate-deployment-comprehensive.sh"; then
log_error "Comprehensive validation failed (check output above)"
log_error "This validation ensures:"
log_error " 1. All config files are in correct locations"
log_error " 2. Genesis.json is valid and consistent"
log_error " 3. Correct number of nodes deployed"
log_error " 4. Correct templates used for each node type"
log_error " 5. No inconsistencies or gaps"
log_info ""
log_info "Review validation output and fix issues before proceeding"
exit 1
fi
track_phase_time "Deployment validation" "$PHASE_START_4"
log_success "Comprehensive validation passed"
else
log_warn "Comprehensive validation script not found, running basic validation..."
# Fallback to basic validation
if [[ -f "$PROJECT_ROOT/scripts/validation/validate-validator-set.sh" ]]; then
log_info "Validating validator set..."
if bash "$PROJECT_ROOT/scripts/validation/validate-validator-set.sh"; then
log_success "Validator set validation passed"
else
log_warn "Validator set validation had issues (check output above)"
fi
else
log_warn "Validator validation script not found, skipping validation"
fi
fi
else
log_info "Skipping validation (--skip-validation)"
fi
# Calculate total deployment time
DEPLOYMENT_END_TIME=$(date +%s)
TOTAL_DURATION=$((DEPLOYMENT_END_TIME - DEPLOYMENT_START_TIME))
TOTAL_MINUTES=$((TOTAL_DURATION / 60))
TOTAL_SECONDS=$((TOTAL_DURATION % 60))
# Complete progress tracking if using advanced tracking
if [[ "$USE_ADVANCED_PROGRESS" == "true" ]] && command_exists complete_progress 2>/dev/null; then
complete_progress
fi
# Final Summary
log_info ""
log_info "========================================="
log_success "Deployment Complete!"
log_info "========================================="
log_info "Completed: $(date '+%Y-%m-%d %H:%M:%S')"
log_info "Total duration: ${TOTAL_MINUTES}m ${TOTAL_SECONDS}s"
log_info ""
log_info "Deployment Summary:"
log_info " - Containers: $([ "$SKIP_DEPLOYMENT" != "true" ] && echo "✓ Deployed" || echo "⊘ Skipped")"
log_info " - Configuration: $([ "$SKIP_CONFIG" != "true" ] && echo "✓ Copied" || echo "⊘ Skipped")"
log_info " - Bootstrap: $([ "$SKIP_BOOTSTRAP" != "true" ] && echo "✓ Completed" || echo "⊘ Skipped")"
log_info " - Validation: $([ "$SKIP_VALIDATION" != "true" ] && echo "✓ Completed" || echo "⊘ Skipped")"
log_info ""
log_info "Next Steps:"
log_info "1. Verify all services are running:"
log_info " for vmid in 1000 1001 1002 1003 1004 1500 1501 1502 1503 2500 2501 2502; do"
log_info " pct exec \$vmid -- systemctl status besu-validator besu-sentry besu-rpc 2>/dev/null"
log_info " done"
log_info ""
log_info "2. Start Besu services (if not already started):"
log_info " $PROJECT_ROOT/scripts/fix-besu-services.sh"
log_info ""
log_info "3. Check consensus is active (blocks being produced):"
log_info " pct exec 1000 -- curl -X POST -H 'Content-Type: application/json' \\"
log_info " --data '{\"jsonrpc\":\"2.0\",\"method\":\"eth_blockNumber\",\"params\":[],\"id\":1}' \\"
log_info " http://localhost:8545"
log_info ""
log_info "4. Monitor logs for any issues:"
log_info " pct exec 1000 -- journalctl -u besu-validator -f"
log_info ""
log_info "Deployment log: $LOG_FILE"
if [[ -n "${ROLLBACK_LOG:-}" ]]; then
log_info "Rollback log: $ROLLBACK_LOG"
fi
log_info ""
exit 0