#!/bin/bash # install-ceph.sh # Installs and configures Ceph on Proxmox nodes set -euo pipefail # Load environment variables SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" if [ -f "${SCRIPT_DIR}/../.env" ]; then source "${SCRIPT_DIR}/../.env" fi # Colors RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' BLUE='\033[0;34m' NC='\033[0m' # Configuration CEPH_VERSION="${CEPH_VERSION:-quincy}" DEPLOYMENT_NODE="${DEPLOYMENT_NODE:-192.168.11.10}" DEPLOYMENT_HOSTNAME="${DEPLOYMENT_HOSTNAME:-ml110-01}" NODES=("192.168.11.10" "192.168.11.11") NODE_HOSTNAMES=("ml110-01" "r630-01") SSH_KEY="${SSH_KEY:-~/.ssh/sankofa_proxmox}" log() { echo -e "${GREEN}[$(date +'%Y-%m-%d %H:%M:%S')]${NC} $1" } error() { echo -e "${RED}[ERROR]${NC} $1" >&2 exit 1 } warn() { echo -e "${YELLOW}[WARN]${NC} $1" } info() { echo -e "${BLUE}[INFO]${NC} $1" } check_requirements() { log "Checking requirements..." # Check SSH access for node in "${NODES[@]}"; do if ! ssh -i "${SSH_KEY}" -o ConnectTimeout=5 -o StrictHostKeyChecking=no root@"${node}" 'echo "SSH OK"' &>/dev/null; then error "Cannot SSH to ${node}" fi done # Check if ceph-deploy is installed if ! command -v ceph-deploy &> /dev/null; then warn "ceph-deploy not found, will install" fi } install_ceph_deploy() { log "Installing ceph-deploy..." if command -v ceph-deploy &> /dev/null; then info "ceph-deploy already installed" return fi pip3 install ceph-deploy --break-system-packages || pip3 install ceph-deploy } prepare_nodes() { log "Preparing nodes..." for i in "${!NODES[@]}"; do node="${NODES[$i]}" hostname="${NODE_HOSTNAMES[$i]}" log "Preparing ${hostname} (${node})..." ssh -i "${SSH_KEY}" -o StrictHostKeyChecking=no root@"${node}" << EOF set -e # Update system apt update && apt upgrade -y # Install prerequisites apt install -y chrony python3-pip || true # Configure hostname hostnamectl set-hostname ${hostname} # Update /etc/hosts if ! grep -q "192.168.11.10 ml110-01" /etc/hosts; then echo "192.168.11.10 ml110-01 ml110-01.sankofa.nexus" >> /etc/hosts fi if ! grep -q "192.168.11.11 r630-01" /etc/hosts; then echo "192.168.11.11 r630-01 r630-01.sankofa.nexus" >> /etc/hosts fi # Sync time systemctl enable chronyd || true systemctl start chronyd || true chronyd -q 'server time.nist.gov iburst' || true # Add Ceph repository (using new method without apt-key) wget -q -O /etc/apt/keyrings/ceph-release.asc 'https://download.ceph.com/keys/release.asc' mkdir -p /etc/apt/keyrings echo "deb [signed-by=/etc/apt/keyrings/ceph-release.asc] https://download.ceph.com/debian-${CEPH_VERSION}/ bullseye main" > /etc/apt/sources.list.d/ceph.list # Update (ignore enterprise repo errors) apt update || apt update --allow-releaseinfo-change || true # Install Ceph apt install -y ceph ceph-common ceph-mds || { # If installation fails, try with no-subscription repo echo "deb http://download.proxmox.com/debian/ceph-quincy bullseye no-subscription" > /etc/apt/sources.list.d/ceph-no-sub.list apt update apt install -y ceph ceph-common ceph-mds } # Create ceph user if ! id ceph &>/dev/null; then useradd -d /home/ceph -m -s /bin/bash ceph echo "ceph ALL = (root) NOPASSWD:ALL" | tee /etc/sudoers.d/ceph chmod 0440 /etc/sudoers.d/ceph fi EOF done } setup_ssh_keys() { log "Setting up SSH keys for ceph user..." # Generate key on deployment node ssh -i "${SSH_KEY}" -o StrictHostKeyChecking=no root@"${DEPLOYMENT_NODE}" << EOF set -e su - ceph << 'CEPH_USER' if [ ! -f ~/.ssh/id_rsa ]; then ssh-keygen -t rsa -N '' -f ~/.ssh/id_rsa cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys chmod 600 ~/.ssh/authorized_keys fi CEPH_USER EOF # Copy key to other nodes PUB_KEY=$(ssh -i "${SSH_KEY}" -o StrictHostKeyChecking=no root@"${DEPLOYMENT_NODE}" 'cat /home/ceph/.ssh/id_rsa.pub') for node in "${NODES[@]}"; do if [ "${node}" != "${DEPLOYMENT_NODE}" ]; then ssh -i "${SSH_KEY}" -o StrictHostKeyChecking=no root@"${node}" << EOF set -e mkdir -p /home/ceph/.ssh echo "${PUB_KEY}" >> /home/ceph/.ssh/authorized_keys chown -R ceph:ceph /home/ceph/.ssh chmod 700 /home/ceph/.ssh chmod 600 /home/ceph/.ssh/authorized_keys EOF fi done } initialize_cluster() { log "Initializing Ceph cluster..." ssh -i "${SSH_KEY}" -o StrictHostKeyChecking=no root@"${DEPLOYMENT_NODE}" << EOF set -e su - ceph << 'CEPH_USER' cd ~ mkdir -p ceph-cluster cd ceph-cluster # Create cluster configuration ceph-deploy new ${NODE_HOSTNAMES[@]} # Add configuration for 2-node setup cat >> ceph.conf << 'CEPH_CONF' [global] osd pool default size = 2 osd pool default min size = 1 osd pool default pg num = 128 osd pool default pgp num = 128 public network = 192.168.11.0/24 cluster network = 192.168.11.0/24 CEPH_CONF # Install Ceph on all nodes ceph-deploy install ${NODE_HOSTNAMES[@]} # Create initial monitors ceph-deploy mon create-initial # Deploy admin key ceph-deploy admin ${NODE_HOSTNAMES[@]} # Set permissions sudo chmod +r /etc/ceph/ceph.client.admin.keyring CEPH_USER EOF } add_osds() { log "Adding OSDs..." info "Using /dev/sdb on both nodes (unused disk)" for i in "${!NODES[@]}"; do node_ip="${NODES[$i]}" node_hostname="${NODE_HOSTNAMES[$i]}" log "Listing disks on ${node_hostname} (${node_ip})..." ssh -i "${SSH_KEY}" -o StrictHostKeyChecking=no root@"${node_ip}" 'lsblk -d -o NAME,SIZE,TYPE | grep -E "NAME|disk"' DISK="/dev/sdb" log "Creating OSD on ${node_hostname} using ${DISK}..." ssh -i "${SSH_KEY}" -o StrictHostKeyChecking=no root@"${DEPLOYMENT_NODE}" << EOF set -e su - ceph << 'CEPH_USER' cd ~/ceph-cluster # Zap disk ceph-deploy disk zap ${node_hostname} ${DISK} # Create OSD ceph-deploy osd create --data ${DISK} ${node_hostname} CEPH_USER EOF done } deploy_manager() { log "Deploying Ceph Manager..." ssh -i "${SSH_KEY}" -o StrictHostKeyChecking=no root@"${DEPLOYMENT_NODE}" << EOF set -e su - ceph << 'CEPH_USER' cd ~/ceph-cluster # Deploy manager ceph-deploy mgr create ${NODE_HOSTNAMES[@]} CEPH_USER EOF } verify_cluster() { log "Verifying Ceph cluster..." ssh -i "${SSH_KEY}" -o StrictHostKeyChecking=no root@"${DEPLOYMENT_NODE}" << EOF set -e su - ceph << 'CEPH_USER' cd ~/ceph-cluster echo "=== Cluster Status ===" ceph -s echo "" echo "=== OSD Tree ===" ceph osd tree echo "" echo "=== Health ===" ceph health CEPH_USER EOF } create_rbd_pool() { log "Creating RBD pool for Proxmox..." ssh -i "${SSH_KEY}" -o StrictHostKeyChecking=no root@"${DEPLOYMENT_NODE}" << EOF set -e su - ceph << 'CEPH_USER' cd ~/ceph-cluster # Create RBD pool ceph osd pool create rbd 128 128 # Initialize pool rbd pool init rbd echo "RBD pool created and initialized" CEPH_USER EOF } main() { log "Starting Ceph installation..." check_requirements install_ceph_deploy prepare_nodes setup_ssh_keys initialize_cluster add_osds deploy_manager verify_cluster create_rbd_pool log "Ceph installation complete!" info "Next steps:" info " 1. Configure Proxmox storage pools" info " 2. Enable Ceph dashboard" info " 3. Set up monitoring" } if [ "${BASH_SOURCE[0]}" == "${0}" ]; then main "$@" fi