Files
proxmox/smom-dbis-138-proxmox/scripts/network/bootstrap-network.sh

338 lines
11 KiB
Bash
Executable File

#!/usr/bin/env bash
# Network Bootstrap Script for Besu Validated Set
# Orchestrates network bootstrap using script-based approach (static-nodes.json)
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
source "$PROJECT_ROOT/lib/common.sh"
# Load configuration
load_config
load_config "$PROJECT_ROOT/config/network.conf" || true
# VMID ranges (from config - new ranges)
VALIDATORS_START="${VALIDATOR_START:-1000}"
VALIDATORS_COUNT="${VALIDATOR_COUNT:-${VALIDATORS_COUNT:-5}}"
VALIDATORS_END=$((VALIDATORS_START + VALIDATORS_COUNT - 1))
SENTRIES_START="${SENTRY_START:-1500}"
SENTRIES_COUNT="${SENTRY_COUNT:-${SENTRIES_COUNT:-4}}"
SENTRIES_END=$((SENTRIES_START + SENTRIES_COUNT - 1))
RPC_START="${RPC_START:-2500}"
RPC_COUNT="${RPC_COUNT:-3}"
RPC_END=$((RPC_START + RPC_COUNT - 1))
# Build arrays
VALIDATORS=()
SENTRIES=()
RPC_NODES=()
ALL_BESU=()
for ((vmid=VALIDATORS_START; vmid<=VALIDATORS_END; vmid++)); do
VALIDATORS+=($vmid)
ALL_BESU+=($vmid)
done
for ((vmid=SENTRIES_START; vmid<=SENTRIES_END; vmid++)); do
SENTRIES+=($vmid)
ALL_BESU+=($vmid)
done
for ((vmid=RPC_START; vmid<=RPC_END; vmid++)); do
RPC_NODES+=($vmid)
ALL_BESU+=($vmid)
done
log_info "========================================="
log_info "Network Bootstrap - Script-Based Approach"
log_info "========================================="
log_info ""
log_info "Validators: ${#VALIDATORS[@]} (${VALIDATORS_START}-${VALIDATORS_END})"
log_info "Sentries: ${#SENTRIES[@]} (${SENTRIES_START}-${SENTRIES_END})"
log_info "RPC Nodes: ${#RPC_NODES[@]} (${RPC_START}-${RPC_END})"
log_info "Total: ${#ALL_BESU[@]} nodes"
log_info ""
# Function to get container IP address
get_container_ip() {
local vmid=$1
if pct status "$vmid" 2>/dev/null | grep -q running; then
pct exec "$vmid" -- hostname -I 2>/dev/null | awk '{print $1}' || echo ""
else
echo ""
fi
}
# Function to check if node is ready (P2P listening)
check_node_ready() {
local vmid=$1
local max_wait=${2:-60}
local wait_time=0
log_info "Waiting for node $vmid to be ready (max ${max_wait}s)..."
while [[ $wait_time -lt $max_wait ]]; do
if pct status "$vmid" 2>/dev/null | grep -q running; then
# Check if Besu process is running
if pct exec "$vmid" -- pgrep -f "besu" >/dev/null 2>&1; then
# Check if P2P port is listening (port 30303)
if pct exec "$vmid" -- netstat -tuln 2>/dev/null | grep -q ":30303" || \
pct exec "$vmid" -- ss -tuln 2>/dev/null | grep -q ":30303"; then
log_success "Node $vmid is ready"
return 0
fi
fi
fi
sleep 2
wait_time=$((wait_time + 2))
if [[ $((wait_time % 10)) -eq 0 ]]; then
log_info "Still waiting... (${wait_time}s elapsed)"
fi
done
log_warn "Node $vmid not ready after ${max_wait}s (may still be starting)"
return 1
}
# Function to extract enode from node
extract_enode() {
local vmid=$1
local ip=$2
# Try RPC method first (if RPC is enabled)
local enode_rpc
enode_rpc=$(pct exec "$vmid" -- curl -s -X POST \
-H "Content-Type: application/json" \
-d '{"jsonrpc":"2.0","method":"admin_nodeInfo","params":[],"id":1}' \
http://localhost:8545 2>/dev/null | \
python3 -c "import sys, json; data=json.load(sys.stdin); print(data.get('result', {}).get('enode', ''))" 2>/dev/null || echo "")
if [[ -n "$enode_rpc" ]] && [[ "$enode_rpc" != "null" ]] && [[ "$enode_rpc" != "" ]]; then
# Replace IP in enode with actual IP
echo "$enode_rpc" | sed "s/@[^:]*:/@${ip}:/"
return 0
fi
# Fallback: Extract from nodekey using Besu public-key export
local nodekey_path="/data/besu/nodekey"
if pct exec "$vmid" -- test -f "$nodekey_path" 2>/dev/null; then
# Try using Besu to export public key
local node_pubkey
node_pubkey=$(pct exec "$vmid" -- bash -c "cd /data/besu && /opt/besu/bin/besu public-key export --node-private-key-file=nodekey 2>/dev/null | tail -1 | tr -d '\n\r ' || echo """)
if [[ -n "$node_pubkey" ]] && [[ ${#node_pubkey} -eq 128 ]]; then
echo "enode://${node_pubkey}@${ip}:30303"
return 0
fi
# Alternative: Try reading from nodekey.pub if it exists
if pct exec "$vmid" -- test -f "${nodekey_path}.pub" 2>/dev/null; then
node_pubkey=$(pct exec "$vmid" -- cat "${nodekey_path}.pub" 2>/dev/null | tr -d '\n\r ' || echo "")
if [[ -n "$node_pubkey" ]] && [[ ${#node_pubkey} -eq 128 ]]; then
echo "enode://${node_pubkey}@${ip}:30303"
return 0
fi
fi
fi
log_warn "Could not extract enode for node $vmid"
return 1
}
# Step 1: Collect enodes from all validator nodes
log_info "=== Step 1: Collecting Enodes from Validators ==="
declare -A ENODE_MAP
VALIDATOR_ENODES=()
for vmid in "${VALIDATORS[@]}"; do
if ! pct status "$vmid" 2>/dev/null | grep -q running; then
log_warn "Container $vmid is not running, skipping"
continue
fi
log_info "Collecting enode from validator $vmid..."
ip=$(get_container_ip "$vmid")
if [[ -z "$ip" ]]; then
log_warn "Could not get IP for container $vmid"
continue
fi
if check_node_ready "$vmid" 30; then
enode=$(extract_enode "$vmid" "$ip")
if [[ -n "$enode" ]]; then
ENODE_MAP[$vmid]=$enode
VALIDATOR_ENODES+=("$enode")
log_success "Validator $vmid: $enode"
else
log_warn "Could not extract enode from validator $vmid"
fi
fi
done
if [[ ${#VALIDATOR_ENODES[@]} -eq 0 ]]; then
error_exit "No validator enodes collected. Ensure validators are running and ready."
fi
log_success "Collected ${#VALIDATOR_ENODES[@]} validator enodes"
# Step 2: Generate static-nodes.json (validators only for QBFT)
log_info ""
log_info "=== Step 2: Generating static-nodes.json ==="
STATIC_NODES_JSON="/tmp/static-nodes-$$.json"
cat > "$STATIC_NODES_JSON" <<EOF
[
$(printf ' "%s",\n' "${VALIDATOR_ENODES[@]}" | sed '$s/,$//')
]
EOF
log_success "Generated static-nodes.json with ${#VALIDATOR_ENODES[@]} validators"
# Step 3: Deploy static-nodes.json to all nodes
log_info ""
log_info "=== Step 3: Deploying static-nodes.json to All Nodes ==="
for vmid in "${ALL_BESU[@]}"; do
if ! pct status "$vmid" 2>/dev/null | grep -q running; then
log_warn "Container $vmid is not running, skipping"
continue
fi
log_info "Deploying static-nodes.json to container $vmid..."
if pct push "$vmid" "$STATIC_NODES_JSON" /etc/besu/static-nodes.json >/dev/null 2>&1; then
pct exec "$vmid" -- chown besu:besu /etc/besu/static-nodes.json 2>/dev/null || true
log_success "Deployed to container $vmid"
else
log_warn "Failed to deploy to container $vmid"
fi
done
# Step 4: Restart services in correct order (sentries → validators → RPC)
log_info ""
log_info "=== Step 4: Restarting Services in Correct Order ==="
# Function to restart Besu service
restart_besu_service() {
local vmid=$1
local service_type=$2
local service_name=""
case "$service_type" in
validator)
service_name="besu-validator"
;;
sentry)
service_name="besu-sentry"
;;
rpc)
service_name="besu-rpc"
;;
*)
log_warn "Unknown service type: $service_type"
return 1
;;
esac
log_info "Restarting $service_name on container $vmid..."
if pct exec "$vmid" -- systemctl restart "$service_name" 2>/dev/null; then
sleep 3
if check_node_ready "$vmid" 60; then
log_success "Service restarted and ready on container $vmid"
return 0
else
log_warn "Service restarted but not fully ready on container $vmid"
return 1
fi
else
log_warn "Failed to restart service on container $vmid"
return 1
fi
}
# Restart sentries first
log_info "Restarting sentries..."
for vmid in "${SENTRIES[@]}"; do
if pct status "$vmid" 2>/dev/null | grep -q running; then
restart_besu_service "$vmid" "sentry" || true
fi
done
# Wait a bit for sentries to stabilize
sleep 5
# Restart validators
log_info "Restarting validators..."
for vmid in "${VALIDATORS[@]}"; do
if pct status "$vmid" 2>/dev/null | grep -q running; then
restart_besu_service "$vmid" "validator" || true
fi
done
# Wait a bit for validators to connect
sleep 5
# Restart RPC nodes
log_info "Restarting RPC nodes..."
for vmid in "${RPC_NODES[@]}"; do
if pct status "$vmid" 2>/dev/null | grep -q running; then
restart_besu_service "$vmid" "rpc" || true
fi
done
# Step 5: Verify peer connections
log_info ""
log_info "=== Step 5: Verifying Peer Connections ==="
sleep 10 # Give nodes time to establish connections
VERIFICATION_FAILED=0
for vmid in "${ALL_BESU[@]}"; do
if ! pct status "$vmid" 2>/dev/null | grep -q running; then
continue
fi
# Try to get peer count via RPC (if enabled)
peer_count=$(pct exec "$vmid" -- curl -s -X POST \
-H "Content-Type: application/json" \
-d '{"jsonrpc":"2.0","method":"admin_peers","params":[],"id":1}' \
http://localhost:8545 2>/dev/null | \
python3 -c "import sys, json; data=json.load(sys.stdin); peers=data.get('result', []); print(len(peers) if isinstance(peers, list) else 0)" 2>/dev/null || echo "0")
if [[ -n "$peer_count" ]] && [[ "$peer_count" != "0" ]]; then
log_success "Container $vmid: $peer_count peer(s) connected"
else
log_warn "Container $vmid: No peers detected (may still be connecting)"
VERIFICATION_FAILED=$((VERIFICATION_FAILED + 1))
fi
done
# Cleanup
rm -f "$STATIC_NODES_JSON"
log_info ""
if [[ $VERIFICATION_FAILED -eq 0 ]]; then
log_success "========================================="
log_success "Network Bootstrap Complete!"
log_success "========================================="
log_info ""
log_info "Next steps:"
log_info "1. Verify all services are running: systemctl status besu-*"
log_info "2. Check consensus is active (blocks being produced)"
log_info "3. Validate validator set participation"
exit 0
else
log_warn "========================================="
log_warn "Network Bootstrap Complete with Warnings"
log_warn "========================================="
log_warn "$VERIFICATION_FAILED node(s) may not have peers connected yet"
log_info "This is normal if nodes are still starting up"
log_info "Wait a few minutes and check again"
exit 0
fi