Files
Sankofa/scripts/setup-monitoring.sh
defiQUG 9daf1fd378 Apply Composer changes: comprehensive API updates, migrations, middleware, and infrastructure improvements
- Add comprehensive database migrations (001-024) for schema evolution
- Enhance API schema with expanded type definitions and resolvers
- Add new middleware: audit logging, rate limiting, MFA enforcement, security, tenant auth
- Implement new services: AI optimization, billing, blockchain, compliance, marketplace
- Add adapter layer for cloud integrations (Cloudflare, Kubernetes, Proxmox, storage)
- Update Crossplane provider with enhanced VM management capabilities
- Add comprehensive test suite for API endpoints and services
- Update frontend components with improved GraphQL subscriptions and real-time updates
- Enhance security configurations and headers (CSP, CORS, etc.)
- Update documentation and configuration files
- Add new CI/CD workflows and validation scripts
- Implement design system improvements and UI enhancements
2025-12-12 18:01:35 -08:00

222 lines
5.6 KiB
Bash
Executable File

#!/bin/bash
# setup-monitoring.sh
# Sets up Prometheus scraping and Grafana dashboards for Proxmox
set -euo pipefail
# Colors
GREEN='\033[0;32m'
RED='\033[0;31m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'
# Configuration
PROMETHEUS_NAMESPACE="${PROMETHEUS_NAMESPACE:-monitoring}"
GRAFANA_NAMESPACE="${GRAFANA_NAMESPACE:-monitoring}"
DASHBOARD_DIR="${DASHBOARD_DIR:-./infrastructure/monitoring/dashboards}"
log() {
echo -e "${GREEN}[$(date +'%Y-%m-%d %H:%M:%S')]${NC} $1"
}
error() {
echo -e "${RED}[ERROR]${NC} $1" >&2
exit 1
}
warn() {
echo -e "${YELLOW}[WARN]${NC} $1"
}
info() {
echo -e "${BLUE}[INFO]${NC} $1"
}
check_prerequisites() {
log "Checking prerequisites..."
if ! command -v kubectl &> /dev/null; then
error "kubectl is required but not installed"
fi
if ! kubectl cluster-info &> /dev/null; then
error "Cannot connect to Kubernetes cluster"
fi
log "✓ Prerequisites check passed"
}
create_prometheus_service_monitor() {
log "Creating Prometheus ServiceMonitor for Proxmox exporters..."
kubectl apply -f - <<EOF
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: proxmox-exporters
namespace: ${PROMETHEUS_NAMESPACE}
labels:
app: proxmox
spec:
selector:
matchLabels:
app: proxmox-exporter
endpoints:
- port: metrics
interval: 30s
path: /metrics
scheme: http
EOF
log "✓ ServiceMonitor created"
}
create_prometheus_scrape_config() {
log "Creating Prometheus scrape configuration..."
# This would be added to Prometheus ConfigMap
info "Add the following to your Prometheus configuration:"
cat <<EOF
- job_name: 'proxmox'
scrape_interval: 30s
static_configs:
- targets:
- 'ml110-01-metrics.sankofa.nexus:9221'
- 'r630-01-metrics.sankofa.nexus:9221'
labels:
cluster: 'proxmox'
EOF
}
import_grafana_dashboards() {
log "Importing Grafana dashboards..."
if [ ! -d "$DASHBOARD_DIR" ]; then
warn "Dashboard directory not found: ${DASHBOARD_DIR}"
return 0
fi
local dashboards=(
"proxmox-cluster.json"
"proxmox-vms.json"
"proxmox-node.json"
)
for dashboard in "${dashboards[@]}"; do
local dashboard_file="${DASHBOARD_DIR}/${dashboard}"
if [ -f "$dashboard_file" ]; then
info "Dashboard file found: ${dashboard}"
info "Import via Grafana UI or API:"
info " kubectl port-forward -n ${GRAFANA_NAMESPACE} svc/grafana 3000:80"
info " Then import: http://localhost:3000/dashboard/import"
else
warn "Dashboard file not found: ${dashboard_file}"
fi
done
}
create_grafana_datasource() {
log "Creating Grafana datasource configuration..."
info "Prometheus datasource should be configured in Grafana:"
info " URL: http://prometheus.${PROMETHEUS_NAMESPACE}.svc.cluster.local:9090"
info " Access: Server (default)"
info ""
info "Configure via Grafana UI or API"
}
create_alerts() {
log "Creating Prometheus alert rules..."
kubectl apply -f - <<EOF
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: proxmox-alerts
namespace: ${PROMETHEUS_NAMESPACE}
labels:
app: proxmox
spec:
groups:
- name: proxmox
interval: 30s
rules:
- alert: ProxmoxNodeDown
expr: up{job="proxmox"} == 0
for: 5m
labels:
severity: critical
annotations:
summary: "Proxmox node is down"
description: "Proxmox node {{ \$labels.instance }} has been down for more than 5 minutes"
- alert: ProxmoxHighCPU
expr: pve_node_cpu_usage > 90
for: 10m
labels:
severity: warning
annotations:
summary: "Proxmox node CPU usage is high"
description: "Node {{ \$labels.node }} CPU usage is {{ \$value }}%"
- alert: ProxmoxHighMemory
expr: pve_node_memory_usage > 90
for: 10m
labels:
severity: warning
annotations:
summary: "Proxmox node memory usage is high"
description: "Node {{ \$labels.node }} memory usage is {{ \$value }}%"
- alert: ProxmoxStorageFull
expr: pve_storage_usage > 90
for: 5m
labels:
severity: critical
annotations:
summary: "Proxmox storage is nearly full"
description: "Storage {{ \$labels.storage }} on node {{ \$labels.node }} is {{ \$value }}% full"
EOF
log "✓ Alert rules created"
}
main() {
echo ""
echo "╔══════════════════════════════════════════════════════════════╗"
echo "║ Proxmox Monitoring Setup ║"
echo "╚══════════════════════════════════════════════════════════════╝"
echo ""
check_prerequisites
echo ""
create_prometheus_service_monitor
echo ""
create_prometheus_scrape_config
echo ""
create_alerts
echo ""
import_grafana_dashboards
echo ""
create_grafana_datasource
echo ""
log "Monitoring setup complete!"
echo ""
info "Next steps:"
info "1. Verify Prometheus is scraping: kubectl port-forward -n ${PROMETHEUS_NAMESPACE} svc/prometheus 9090:9090"
info "2. Import Grafana dashboards via UI"
info "3. Configure alert notifications"
info "4. Verify metrics are being collected"
}
main "$@"