Files
Sankofa/scripts/install-ceph.sh

310 lines
7.3 KiB
Bash
Raw Normal View History

#!/bin/bash
# install-ceph.sh
# Installs and configures Ceph on Proxmox nodes
set -euo pipefail
# Load environment variables
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
if [ -f "${SCRIPT_DIR}/../.env" ]; then
source "${SCRIPT_DIR}/../.env"
fi
# Colors
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'
# Configuration
CEPH_VERSION="${CEPH_VERSION:-quincy}"
DEPLOYMENT_NODE="${DEPLOYMENT_NODE:-192.168.11.10}"
DEPLOYMENT_HOSTNAME="${DEPLOYMENT_HOSTNAME:-ml110-01}"
NODES=("192.168.11.10" "192.168.11.11")
NODE_HOSTNAMES=("ml110-01" "r630-01")
SSH_KEY="${SSH_KEY:-~/.ssh/sankofa_proxmox}"
log() {
echo -e "${GREEN}[$(date +'%Y-%m-%d %H:%M:%S')]${NC} $1"
}
error() {
echo -e "${RED}[ERROR]${NC} $1" >&2
exit 1
}
warn() {
echo -e "${YELLOW}[WARN]${NC} $1"
}
info() {
echo -e "${BLUE}[INFO]${NC} $1"
}
check_requirements() {
log "Checking requirements..."
# Check SSH access
for node in "${NODES[@]}"; do
if ! ssh -i "${SSH_KEY}" -o ConnectTimeout=5 -o StrictHostKeyChecking=no root@"${node}" 'echo "SSH OK"' &>/dev/null; then
error "Cannot SSH to ${node}"
fi
done
# Check if ceph-deploy is installed
if ! command -v ceph-deploy &> /dev/null; then
warn "ceph-deploy not found, will install"
fi
}
install_ceph_deploy() {
log "Installing ceph-deploy..."
if command -v ceph-deploy &> /dev/null; then
info "ceph-deploy already installed"
return
fi
pip3 install ceph-deploy --break-system-packages || pip3 install ceph-deploy
}
prepare_nodes() {
log "Preparing nodes..."
for i in "${!NODES[@]}"; do
node="${NODES[$i]}"
hostname="${NODE_HOSTNAMES[$i]}"
log "Preparing ${hostname} (${node})..."
ssh -i "${SSH_KEY}" -o StrictHostKeyChecking=no root@"${node}" << EOF
set -e
# Update system
apt update && apt upgrade -y
# Install prerequisites
apt install -y chrony python3-pip || true
# Configure hostname
hostnamectl set-hostname ${hostname}
# Update /etc/hosts
if ! grep -q "192.168.11.10 ml110-01" /etc/hosts; then
echo "192.168.11.10 ml110-01 ml110-01.sankofa.nexus" >> /etc/hosts
fi
if ! grep -q "192.168.11.11 r630-01" /etc/hosts; then
echo "192.168.11.11 r630-01 r630-01.sankofa.nexus" >> /etc/hosts
fi
# Sync time
systemctl enable chronyd || true
systemctl start chronyd || true
chronyd -q 'server time.nist.gov iburst' || true
# Add Ceph repository (using new method without apt-key)
wget -q -O /etc/apt/keyrings/ceph-release.asc 'https://download.ceph.com/keys/release.asc'
mkdir -p /etc/apt/keyrings
echo "deb [signed-by=/etc/apt/keyrings/ceph-release.asc] https://download.ceph.com/debian-${CEPH_VERSION}/ bullseye main" > /etc/apt/sources.list.d/ceph.list
# Update (ignore enterprise repo errors)
apt update || apt update --allow-releaseinfo-change || true
# Install Ceph
apt install -y ceph ceph-common ceph-mds || {
# If installation fails, try with no-subscription repo
echo "deb http://download.proxmox.com/debian/ceph-quincy bullseye no-subscription" > /etc/apt/sources.list.d/ceph-no-sub.list
apt update
apt install -y ceph ceph-common ceph-mds
}
# Create ceph user
if ! id ceph &>/dev/null; then
useradd -d /home/ceph -m -s /bin/bash ceph
echo "ceph ALL = (root) NOPASSWD:ALL" | tee /etc/sudoers.d/ceph
chmod 0440 /etc/sudoers.d/ceph
fi
EOF
done
}
setup_ssh_keys() {
log "Setting up SSH keys for ceph user..."
# Generate key on deployment node
ssh -i "${SSH_KEY}" -o StrictHostKeyChecking=no root@"${DEPLOYMENT_NODE}" << EOF
set -e
su - ceph << 'CEPH_USER'
if [ ! -f ~/.ssh/id_rsa ]; then
ssh-keygen -t rsa -N '' -f ~/.ssh/id_rsa
cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
chmod 600 ~/.ssh/authorized_keys
fi
CEPH_USER
EOF
# Copy key to other nodes
PUB_KEY=$(ssh -i "${SSH_KEY}" -o StrictHostKeyChecking=no root@"${DEPLOYMENT_NODE}" 'cat /home/ceph/.ssh/id_rsa.pub')
for node in "${NODES[@]}"; do
if [ "${node}" != "${DEPLOYMENT_NODE}" ]; then
ssh -i "${SSH_KEY}" -o StrictHostKeyChecking=no root@"${node}" << EOF
set -e
mkdir -p /home/ceph/.ssh
echo "${PUB_KEY}" >> /home/ceph/.ssh/authorized_keys
chown -R ceph:ceph /home/ceph/.ssh
chmod 700 /home/ceph/.ssh
chmod 600 /home/ceph/.ssh/authorized_keys
EOF
fi
done
}
initialize_cluster() {
log "Initializing Ceph cluster..."
ssh -i "${SSH_KEY}" -o StrictHostKeyChecking=no root@"${DEPLOYMENT_NODE}" << EOF
set -e
su - ceph << 'CEPH_USER'
cd ~
mkdir -p ceph-cluster
cd ceph-cluster
# Create cluster configuration
ceph-deploy new ${NODE_HOSTNAMES[@]}
# Add configuration for 2-node setup
cat >> ceph.conf << 'CEPH_CONF'
[global]
osd pool default size = 2
osd pool default min size = 1
osd pool default pg num = 128
osd pool default pgp num = 128
public network = 192.168.11.0/24
cluster network = 192.168.11.0/24
CEPH_CONF
# Install Ceph on all nodes
ceph-deploy install ${NODE_HOSTNAMES[@]}
# Create initial monitors
ceph-deploy mon create-initial
# Deploy admin key
ceph-deploy admin ${NODE_HOSTNAMES[@]}
# Set permissions
sudo chmod +r /etc/ceph/ceph.client.admin.keyring
CEPH_USER
EOF
}
add_osds() {
log "Adding OSDs..."
info "Using /dev/sdb on both nodes (unused disk)"
for i in "${!NODES[@]}"; do
node_ip="${NODES[$i]}"
node_hostname="${NODE_HOSTNAMES[$i]}"
log "Listing disks on ${node_hostname} (${node_ip})..."
ssh -i "${SSH_KEY}" -o StrictHostKeyChecking=no root@"${node_ip}" 'lsblk -d -o NAME,SIZE,TYPE | grep -E "NAME|disk"'
DISK="/dev/sdb"
log "Creating OSD on ${node_hostname} using ${DISK}..."
ssh -i "${SSH_KEY}" -o StrictHostKeyChecking=no root@"${DEPLOYMENT_NODE}" << EOF
set -e
su - ceph << 'CEPH_USER'
cd ~/ceph-cluster
# Zap disk
ceph-deploy disk zap ${node_hostname} ${DISK}
# Create OSD
ceph-deploy osd create --data ${DISK} ${node_hostname}
CEPH_USER
EOF
done
}
deploy_manager() {
log "Deploying Ceph Manager..."
ssh -i "${SSH_KEY}" -o StrictHostKeyChecking=no root@"${DEPLOYMENT_NODE}" << EOF
set -e
su - ceph << 'CEPH_USER'
cd ~/ceph-cluster
# Deploy manager
ceph-deploy mgr create ${NODE_HOSTNAMES[@]}
CEPH_USER
EOF
}
verify_cluster() {
log "Verifying Ceph cluster..."
ssh -i "${SSH_KEY}" -o StrictHostKeyChecking=no root@"${DEPLOYMENT_NODE}" << EOF
set -e
su - ceph << 'CEPH_USER'
cd ~/ceph-cluster
echo "=== Cluster Status ==="
ceph -s
echo ""
echo "=== OSD Tree ==="
ceph osd tree
echo ""
echo "=== Health ==="
ceph health
CEPH_USER
EOF
}
create_rbd_pool() {
log "Creating RBD pool for Proxmox..."
ssh -i "${SSH_KEY}" -o StrictHostKeyChecking=no root@"${DEPLOYMENT_NODE}" << EOF
set -e
su - ceph << 'CEPH_USER'
cd ~/ceph-cluster
# Create RBD pool
ceph osd pool create rbd 128 128
# Initialize pool
rbd pool init rbd
echo "RBD pool created and initialized"
CEPH_USER
EOF
}
main() {
log "Starting Ceph installation..."
check_requirements
install_ceph_deploy
prepare_nodes
setup_ssh_keys
initialize_cluster
add_osds
deploy_manager
verify_cluster
create_rbd_pool
log "Ceph installation complete!"
info "Next steps:"
info " 1. Configure Proxmox storage pools"
info " 2. Enable Ceph dashboard"
info " 3. Set up monitoring"
}
if [ "${BASH_SOURCE[0]}" == "${0}" ]; then
main "$@"
fi