Files
loc_az_hci/scripts/deploy/deploy-observability.sh
defiQUG c39465c2bd
Some checks failed
Test / test (push) Has been cancelled
Initial commit: loc_az_hci (smom-dbis-138 excluded via .gitignore)
Co-authored-by: Cursor <cursoragent@cursor.com>
2026-02-08 09:04:46 -08:00

198 lines
5.0 KiB
Bash
Executable File

#!/bin/bash
source ~/.bashrc
# Deploy Observability Stack (Prometheus + Grafana) on VM 103 using guest-agent IP discovery
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
# Load environment variables
if [ -f "$PROJECT_ROOT/.env" ]; then
set -a
source <(grep -v '^#' "$PROJECT_ROOT/.env" | grep -v '^$' | sed 's/#.*$//' | grep '=')
set +a
fi
# Colors
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'
log_info() {
echo -e "${GREEN}[INFO]${NC} $1"
}
log_warn() {
echo -e "${YELLOW}[WARN]${NC} $1"
}
log_error() {
echo -e "${RED}[ERROR]${NC} $1"
}
VM_USER="${VM_USER:-ubuntu}"
SSH_KEY="${SSH_KEY:-$HOME/.ssh/id_ed25519_proxmox}"
VMID=103
VM_NAME="observability"
# Import helper library
if [ -f "$PROJECT_ROOT/scripts/lib/proxmox_vm_helpers.sh" ]; then
source "$PROJECT_ROOT/scripts/lib/proxmox_vm_helpers.sh"
else
log_error "Helper library not found"
exit 1
fi
main() {
log_info "Deploying Observability Stack on VM $VMID ($VM_NAME)"
echo ""
# Get IP using guest agent
local ip
ip="$(get_vm_ip_or_warn "$VMID" "$VM_NAME" || true)"
if [[ -z "$ip" ]]; then
log_error "Cannot get IP for VM $VMID. Ensure SSH is working and QEMU Guest Agent is installed."
exit 1
fi
log_info "Using IP: $ip"
echo ""
# Check if Docker is installed
log_info "Checking Docker installation..."
if ! ssh -i "$SSH_KEY" -o StrictHostKeyChecking=no "${VM_USER}@${ip}" "command -v docker" &>/dev/null; then
log_warn "Docker not found. Installing Docker..."
ssh -i "$SSH_KEY" -o StrictHostKeyChecking=no "${VM_USER}@${ip}" <<'EOF'
set -e
sudo apt-get update -qq
sudo apt-get install -y docker.io docker-compose
sudo usermod -aG docker $USER
EOF
log_info "Docker installed. You may need to log out and back in for group changes."
else
log_info "Docker is installed"
fi
# Create observability directory structure
log_info "Setting up observability directory..."
ssh -i "$SSH_KEY" -o StrictHostKeyChecking=no "${VM_USER}@${ip}" <<'EOF'
set -e
mkdir -p ~/observability/prometheus
cd ~/observability
EOF
# Create Prometheus config
log_info "Creating Prometheus configuration..."
ssh -i "$SSH_KEY" -o StrictHostKeyChecking=no "${VM_USER}@${ip}" "cat > ~/observability/prometheus/prometheus.yml" <<'EOF'
global:
scrape_interval: 15s
evaluation_interval: 15s
scrape_configs:
- job_name: 'prometheus'
static_configs:
- targets: ['localhost:9090']
EOF
# Create docker-compose file
log_info "Creating docker-compose.yml..."
ssh -i "$SSH_KEY" -o StrictHostKeyChecking=no "${VM_USER}@${ip}" "cat > ~/observability/docker-compose.yml" <<'EOF'
version: '3.8'
services:
prometheus:
image: prom/prometheus:latest
container_name: prometheus
restart: unless-stopped
ports:
- "9090:9090"
volumes:
- ./prometheus:/etc/prometheus
- prometheus-data:/prometheus
command:
- '--config.file=/etc/prometheus/prometheus.yml'
- '--storage.tsdb.path=/prometheus'
- '--storage.tsdb.retention.time=30d'
networks:
- observability
grafana:
image: grafana/grafana:latest
container_name: grafana
restart: unless-stopped
ports:
- "3000:3000"
environment:
- GF_SECURITY_ADMIN_USER=admin
- GF_SECURITY_ADMIN_PASSWORD=admin
- GF_USERS_ALLOW_SIGN_UP=false
- GF_SERVER_ROOT_URL=http://localhost:3000
volumes:
- grafana-data:/var/lib/grafana
networks:
- observability
depends_on:
- prometheus
volumes:
prometheus-data:
driver: local
grafana-data:
driver: local
networks:
observability:
driver: bridge
EOF
# Deploy
log_info "Deploying Observability Stack with Docker Compose..."
ssh -i "$SSH_KEY" -o StrictHostKeyChecking=no "${VM_USER}@${ip}" <<'EOF'
set -e
cd ~/observability
sudo docker-compose up -d
EOF
# Wait for services to be ready
log_info "Waiting for services to start..."
sleep 15
# Verify
log_info "Verifying services..."
local prometheus_ok=false
local grafana_ok=false
for i in {1..12}; do
if curl -s "http://${ip}:9090/-/healthy" &>/dev/null; then
prometheus_ok=true
fi
if curl -s "http://${ip}:3000/api/health" &>/dev/null; then
grafana_ok=true
fi
if [ "$prometheus_ok" = true ] && [ "$grafana_ok" = true ]; then
break
fi
sleep 5
echo -n "."
done
echo ""
if [ "$prometheus_ok" = true ] && [ "$grafana_ok" = true ]; then
log_info "✓ Observability Stack is running!"
echo ""
log_info "Access services:"
log_info " Prometheus: http://${ip}:9090"
log_info " Grafana: http://${ip}:3000 (admin/admin)"
else
log_warn "Some services may not be fully ready. Check logs with:"
log_info " ssh ${VM_USER}@${ip} 'cd ~/observability && sudo docker-compose logs'"
fi
}
main "$@"