310 lines
7.3 KiB
Bash
310 lines
7.3 KiB
Bash
|
|
#!/bin/bash
|
||
|
|
# install-ceph.sh
|
||
|
|
# Installs and configures Ceph on Proxmox nodes
|
||
|
|
|
||
|
|
set -euo pipefail
|
||
|
|
|
||
|
|
# Load environment variables
|
||
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||
|
|
if [ -f "${SCRIPT_DIR}/../.env" ]; then
|
||
|
|
source "${SCRIPT_DIR}/../.env"
|
||
|
|
fi
|
||
|
|
|
||
|
|
# Colors
|
||
|
|
RED='\033[0;31m'
|
||
|
|
GREEN='\033[0;32m'
|
||
|
|
YELLOW='\033[1;33m'
|
||
|
|
BLUE='\033[0;34m'
|
||
|
|
NC='\033[0m'
|
||
|
|
|
||
|
|
# Configuration
|
||
|
|
CEPH_VERSION="${CEPH_VERSION:-quincy}"
|
||
|
|
DEPLOYMENT_NODE="${DEPLOYMENT_NODE:-192.168.11.10}"
|
||
|
|
DEPLOYMENT_HOSTNAME="${DEPLOYMENT_HOSTNAME:-ml110-01}"
|
||
|
|
NODES=("192.168.11.10" "192.168.11.11")
|
||
|
|
NODE_HOSTNAMES=("ml110-01" "r630-01")
|
||
|
|
SSH_KEY="${SSH_KEY:-~/.ssh/sankofa_proxmox}"
|
||
|
|
|
||
|
|
log() {
|
||
|
|
echo -e "${GREEN}[$(date +'%Y-%m-%d %H:%M:%S')]${NC} $1"
|
||
|
|
}
|
||
|
|
|
||
|
|
error() {
|
||
|
|
echo -e "${RED}[ERROR]${NC} $1" >&2
|
||
|
|
exit 1
|
||
|
|
}
|
||
|
|
|
||
|
|
warn() {
|
||
|
|
echo -e "${YELLOW}[WARN]${NC} $1"
|
||
|
|
}
|
||
|
|
|
||
|
|
info() {
|
||
|
|
echo -e "${BLUE}[INFO]${NC} $1"
|
||
|
|
}
|
||
|
|
|
||
|
|
check_requirements() {
|
||
|
|
log "Checking requirements..."
|
||
|
|
|
||
|
|
# Check SSH access
|
||
|
|
for node in "${NODES[@]}"; do
|
||
|
|
if ! ssh -i "${SSH_KEY}" -o ConnectTimeout=5 -o StrictHostKeyChecking=no root@"${node}" 'echo "SSH OK"' &>/dev/null; then
|
||
|
|
error "Cannot SSH to ${node}"
|
||
|
|
fi
|
||
|
|
done
|
||
|
|
|
||
|
|
# Check if ceph-deploy is installed
|
||
|
|
if ! command -v ceph-deploy &> /dev/null; then
|
||
|
|
warn "ceph-deploy not found, will install"
|
||
|
|
fi
|
||
|
|
}
|
||
|
|
|
||
|
|
install_ceph_deploy() {
|
||
|
|
log "Installing ceph-deploy..."
|
||
|
|
|
||
|
|
if command -v ceph-deploy &> /dev/null; then
|
||
|
|
info "ceph-deploy already installed"
|
||
|
|
return
|
||
|
|
fi
|
||
|
|
|
||
|
|
pip3 install ceph-deploy --break-system-packages || pip3 install ceph-deploy
|
||
|
|
}
|
||
|
|
|
||
|
|
prepare_nodes() {
|
||
|
|
log "Preparing nodes..."
|
||
|
|
|
||
|
|
for i in "${!NODES[@]}"; do
|
||
|
|
node="${NODES[$i]}"
|
||
|
|
hostname="${NODE_HOSTNAMES[$i]}"
|
||
|
|
|
||
|
|
log "Preparing ${hostname} (${node})..."
|
||
|
|
|
||
|
|
ssh -i "${SSH_KEY}" -o StrictHostKeyChecking=no root@"${node}" << EOF
|
||
|
|
set -e
|
||
|
|
# Update system
|
||
|
|
apt update && apt upgrade -y
|
||
|
|
|
||
|
|
# Install prerequisites
|
||
|
|
apt install -y chrony python3-pip || true
|
||
|
|
|
||
|
|
# Configure hostname
|
||
|
|
hostnamectl set-hostname ${hostname}
|
||
|
|
|
||
|
|
# Update /etc/hosts
|
||
|
|
if ! grep -q "192.168.11.10 ml110-01" /etc/hosts; then
|
||
|
|
echo "192.168.11.10 ml110-01 ml110-01.sankofa.nexus" >> /etc/hosts
|
||
|
|
fi
|
||
|
|
if ! grep -q "192.168.11.11 r630-01" /etc/hosts; then
|
||
|
|
echo "192.168.11.11 r630-01 r630-01.sankofa.nexus" >> /etc/hosts
|
||
|
|
fi
|
||
|
|
|
||
|
|
# Sync time
|
||
|
|
systemctl enable chronyd || true
|
||
|
|
systemctl start chronyd || true
|
||
|
|
chronyd -q 'server time.nist.gov iburst' || true
|
||
|
|
|
||
|
|
# Add Ceph repository (using new method without apt-key)
|
||
|
|
wget -q -O /etc/apt/keyrings/ceph-release.asc 'https://download.ceph.com/keys/release.asc'
|
||
|
|
mkdir -p /etc/apt/keyrings
|
||
|
|
echo "deb [signed-by=/etc/apt/keyrings/ceph-release.asc] https://download.ceph.com/debian-${CEPH_VERSION}/ bullseye main" > /etc/apt/sources.list.d/ceph.list
|
||
|
|
|
||
|
|
# Update (ignore enterprise repo errors)
|
||
|
|
apt update || apt update --allow-releaseinfo-change || true
|
||
|
|
|
||
|
|
# Install Ceph
|
||
|
|
apt install -y ceph ceph-common ceph-mds || {
|
||
|
|
# If installation fails, try with no-subscription repo
|
||
|
|
echo "deb http://download.proxmox.com/debian/ceph-quincy bullseye no-subscription" > /etc/apt/sources.list.d/ceph-no-sub.list
|
||
|
|
apt update
|
||
|
|
apt install -y ceph ceph-common ceph-mds
|
||
|
|
}
|
||
|
|
|
||
|
|
# Create ceph user
|
||
|
|
if ! id ceph &>/dev/null; then
|
||
|
|
useradd -d /home/ceph -m -s /bin/bash ceph
|
||
|
|
echo "ceph ALL = (root) NOPASSWD:ALL" | tee /etc/sudoers.d/ceph
|
||
|
|
chmod 0440 /etc/sudoers.d/ceph
|
||
|
|
fi
|
||
|
|
EOF
|
||
|
|
done
|
||
|
|
}
|
||
|
|
|
||
|
|
setup_ssh_keys() {
|
||
|
|
log "Setting up SSH keys for ceph user..."
|
||
|
|
|
||
|
|
# Generate key on deployment node
|
||
|
|
ssh -i "${SSH_KEY}" -o StrictHostKeyChecking=no root@"${DEPLOYMENT_NODE}" << EOF
|
||
|
|
set -e
|
||
|
|
su - ceph << 'CEPH_USER'
|
||
|
|
if [ ! -f ~/.ssh/id_rsa ]; then
|
||
|
|
ssh-keygen -t rsa -N '' -f ~/.ssh/id_rsa
|
||
|
|
cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
|
||
|
|
chmod 600 ~/.ssh/authorized_keys
|
||
|
|
fi
|
||
|
|
CEPH_USER
|
||
|
|
EOF
|
||
|
|
|
||
|
|
# Copy key to other nodes
|
||
|
|
PUB_KEY=$(ssh -i "${SSH_KEY}" -o StrictHostKeyChecking=no root@"${DEPLOYMENT_NODE}" 'cat /home/ceph/.ssh/id_rsa.pub')
|
||
|
|
|
||
|
|
for node in "${NODES[@]}"; do
|
||
|
|
if [ "${node}" != "${DEPLOYMENT_NODE}" ]; then
|
||
|
|
ssh -i "${SSH_KEY}" -o StrictHostKeyChecking=no root@"${node}" << EOF
|
||
|
|
set -e
|
||
|
|
mkdir -p /home/ceph/.ssh
|
||
|
|
echo "${PUB_KEY}" >> /home/ceph/.ssh/authorized_keys
|
||
|
|
chown -R ceph:ceph /home/ceph/.ssh
|
||
|
|
chmod 700 /home/ceph/.ssh
|
||
|
|
chmod 600 /home/ceph/.ssh/authorized_keys
|
||
|
|
EOF
|
||
|
|
fi
|
||
|
|
done
|
||
|
|
}
|
||
|
|
|
||
|
|
initialize_cluster() {
|
||
|
|
log "Initializing Ceph cluster..."
|
||
|
|
|
||
|
|
ssh -i "${SSH_KEY}" -o StrictHostKeyChecking=no root@"${DEPLOYMENT_NODE}" << EOF
|
||
|
|
set -e
|
||
|
|
su - ceph << 'CEPH_USER'
|
||
|
|
cd ~
|
||
|
|
mkdir -p ceph-cluster
|
||
|
|
cd ceph-cluster
|
||
|
|
|
||
|
|
# Create cluster configuration
|
||
|
|
ceph-deploy new ${NODE_HOSTNAMES[@]}
|
||
|
|
|
||
|
|
# Add configuration for 2-node setup
|
||
|
|
cat >> ceph.conf << 'CEPH_CONF'
|
||
|
|
[global]
|
||
|
|
osd pool default size = 2
|
||
|
|
osd pool default min size = 1
|
||
|
|
osd pool default pg num = 128
|
||
|
|
osd pool default pgp num = 128
|
||
|
|
public network = 192.168.11.0/24
|
||
|
|
cluster network = 192.168.11.0/24
|
||
|
|
CEPH_CONF
|
||
|
|
|
||
|
|
# Install Ceph on all nodes
|
||
|
|
ceph-deploy install ${NODE_HOSTNAMES[@]}
|
||
|
|
|
||
|
|
# Create initial monitors
|
||
|
|
ceph-deploy mon create-initial
|
||
|
|
|
||
|
|
# Deploy admin key
|
||
|
|
ceph-deploy admin ${NODE_HOSTNAMES[@]}
|
||
|
|
|
||
|
|
# Set permissions
|
||
|
|
sudo chmod +r /etc/ceph/ceph.client.admin.keyring
|
||
|
|
CEPH_USER
|
||
|
|
EOF
|
||
|
|
}
|
||
|
|
|
||
|
|
add_osds() {
|
||
|
|
log "Adding OSDs..."
|
||
|
|
info "Using /dev/sdb on both nodes (unused disk)"
|
||
|
|
|
||
|
|
for i in "${!NODES[@]}"; do
|
||
|
|
node_ip="${NODES[$i]}"
|
||
|
|
node_hostname="${NODE_HOSTNAMES[$i]}"
|
||
|
|
|
||
|
|
log "Listing disks on ${node_hostname} (${node_ip})..."
|
||
|
|
ssh -i "${SSH_KEY}" -o StrictHostKeyChecking=no root@"${node_ip}" 'lsblk -d -o NAME,SIZE,TYPE | grep -E "NAME|disk"'
|
||
|
|
|
||
|
|
DISK="/dev/sdb"
|
||
|
|
log "Creating OSD on ${node_hostname} using ${DISK}..."
|
||
|
|
|
||
|
|
ssh -i "${SSH_KEY}" -o StrictHostKeyChecking=no root@"${DEPLOYMENT_NODE}" << EOF
|
||
|
|
set -e
|
||
|
|
su - ceph << 'CEPH_USER'
|
||
|
|
cd ~/ceph-cluster
|
||
|
|
|
||
|
|
# Zap disk
|
||
|
|
ceph-deploy disk zap ${node_hostname} ${DISK}
|
||
|
|
|
||
|
|
# Create OSD
|
||
|
|
ceph-deploy osd create --data ${DISK} ${node_hostname}
|
||
|
|
CEPH_USER
|
||
|
|
EOF
|
||
|
|
done
|
||
|
|
}
|
||
|
|
|
||
|
|
deploy_manager() {
|
||
|
|
log "Deploying Ceph Manager..."
|
||
|
|
|
||
|
|
ssh -i "${SSH_KEY}" -o StrictHostKeyChecking=no root@"${DEPLOYMENT_NODE}" << EOF
|
||
|
|
set -e
|
||
|
|
su - ceph << 'CEPH_USER'
|
||
|
|
cd ~/ceph-cluster
|
||
|
|
|
||
|
|
# Deploy manager
|
||
|
|
ceph-deploy mgr create ${NODE_HOSTNAMES[@]}
|
||
|
|
CEPH_USER
|
||
|
|
EOF
|
||
|
|
}
|
||
|
|
|
||
|
|
verify_cluster() {
|
||
|
|
log "Verifying Ceph cluster..."
|
||
|
|
|
||
|
|
ssh -i "${SSH_KEY}" -o StrictHostKeyChecking=no root@"${DEPLOYMENT_NODE}" << EOF
|
||
|
|
set -e
|
||
|
|
su - ceph << 'CEPH_USER'
|
||
|
|
cd ~/ceph-cluster
|
||
|
|
|
||
|
|
echo "=== Cluster Status ==="
|
||
|
|
ceph -s
|
||
|
|
|
||
|
|
echo ""
|
||
|
|
echo "=== OSD Tree ==="
|
||
|
|
ceph osd tree
|
||
|
|
|
||
|
|
echo ""
|
||
|
|
echo "=== Health ==="
|
||
|
|
ceph health
|
||
|
|
CEPH_USER
|
||
|
|
EOF
|
||
|
|
}
|
||
|
|
|
||
|
|
create_rbd_pool() {
|
||
|
|
log "Creating RBD pool for Proxmox..."
|
||
|
|
|
||
|
|
ssh -i "${SSH_KEY}" -o StrictHostKeyChecking=no root@"${DEPLOYMENT_NODE}" << EOF
|
||
|
|
set -e
|
||
|
|
su - ceph << 'CEPH_USER'
|
||
|
|
cd ~/ceph-cluster
|
||
|
|
|
||
|
|
# Create RBD pool
|
||
|
|
ceph osd pool create rbd 128 128
|
||
|
|
|
||
|
|
# Initialize pool
|
||
|
|
rbd pool init rbd
|
||
|
|
|
||
|
|
echo "RBD pool created and initialized"
|
||
|
|
CEPH_USER
|
||
|
|
EOF
|
||
|
|
}
|
||
|
|
|
||
|
|
main() {
|
||
|
|
log "Starting Ceph installation..."
|
||
|
|
|
||
|
|
check_requirements
|
||
|
|
install_ceph_deploy
|
||
|
|
prepare_nodes
|
||
|
|
setup_ssh_keys
|
||
|
|
initialize_cluster
|
||
|
|
add_osds
|
||
|
|
deploy_manager
|
||
|
|
verify_cluster
|
||
|
|
create_rbd_pool
|
||
|
|
|
||
|
|
log "Ceph installation complete!"
|
||
|
|
info "Next steps:"
|
||
|
|
info " 1. Configure Proxmox storage pools"
|
||
|
|
info " 2. Enable Ceph dashboard"
|
||
|
|
info " 3. Set up monitoring"
|
||
|
|
}
|
||
|
|
|
||
|
|
if [ "${BASH_SOURCE[0]}" == "${0}" ]; then
|
||
|
|
main "$@"
|
||
|
|
fi
|
||
|
|
|