Some checks failed
Deploy to Phoenix / deploy (push) Has been cancelled
- ADD_CHAIN138_TO_LEDGER_LIVE: Ledger form done; public code review repo bis-innovations/LedgerLive; init/push commands - CONTRACT_DEPLOYMENT_RUNBOOK: Chain 138 gas price 1 gwei, 36-addr check, TransactionMirror workaround - CONTRACT_*: AddressMapper, MirrorManager deployed 2026-02-12; 36-address on-chain check - NEXT_STEPS_FOR_YOU: Ledger done; steps completable now (no LAN); run-completable-tasks-from-anywhere - MASTER_INDEX, OPERATOR_OPTIONAL, SMART_CONTRACTS_INVENTORY_SIMPLE: updates - LEDGER_BLOCKCHAIN_INTEGRATION_COMPLETE: bis-innovations/LedgerLive reference Co-authored-by: Cursor <cursoragent@cursor.com>
416 lines
12 KiB
Bash
Executable File
416 lines
12 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
set -euo pipefail
|
|
|
|
# Script to expand RAID 10 from 4 disks to 6 disks
|
|
# WARNING: This requires stopping the RAID array and rebuilding it
|
|
# This will cause downtime and requires data backup/restore
|
|
|
|
set -u
|
|
|
|
TARGET_NODE="r630-01"
|
|
TARGET_NODE_IP="192.168.11.11"
|
|
TARGET_NODE_PASS="password"
|
|
|
|
# Colors
|
|
GREEN='\033[0;32m'
|
|
RED='\033[0;31m'
|
|
YELLOW='\033[1;33m'
|
|
BLUE='\033[0;34m'
|
|
NC='\033[0m'
|
|
|
|
log_info() { echo -e "${BLUE}[INFO]${NC} $1"; }
|
|
log_success() { echo -e "${GREEN}[✓]${NC} $1"; }
|
|
log_error() { echo -e "${RED}[✗]${NC} $1"; }
|
|
log_warn() { echo -e "${YELLOW}[⚠]${NC} $1"; }
|
|
|
|
ssh_r630_01() {
|
|
sshpass -p "$TARGET_NODE_PASS" ssh -o StrictHostKeyChecking=no -o ConnectTimeout=10 root@"$TARGET_NODE_IP" "$@" 2>&1
|
|
}
|
|
|
|
check_prerequisites() {
|
|
log_info "Checking prerequisites..."
|
|
|
|
# Check if RAID exists (it may have been stopped already)
|
|
if ! ssh_r630_01 "test -b /dev/md0"; then
|
|
log_info "RAID /dev/md0 not found (may have been stopped already, continuing...)"
|
|
fi
|
|
|
|
# Check if sdc/sdd are available
|
|
if ! ssh_r630_01 "test -b /dev/sdc && test -b /dev/sdd"; then
|
|
log_error "sdc and/or sdd not found"
|
|
return 1
|
|
fi
|
|
|
|
# Check if sdc/sdd are in use
|
|
if ssh_r630_01 "pvs 2>/dev/null | grep -q /dev/sdc || mount | grep -q /dev/sdc"; then
|
|
log_error "sdc is still in use"
|
|
return 1
|
|
fi
|
|
|
|
if ssh_r630_01 "pvs 2>/dev/null | grep -q /dev/sdd || mount | grep -q /dev/sdd"; then
|
|
log_error "sdd is still in use"
|
|
return 1
|
|
fi
|
|
|
|
log_success "Prerequisites check passed"
|
|
return 0
|
|
}
|
|
|
|
backup_lvm_config() {
|
|
log_info "Backing up LVM configuration..."
|
|
|
|
# Check if VG exists before backing up
|
|
if ssh_r630_01 "vgs pve >/dev/null 2>&1"; then
|
|
ssh_r630_01 "vgcfgbackup pve" || {
|
|
log_warn "vgcfgbackup failed, but continuing..."
|
|
}
|
|
log_success "LVM configuration backed up"
|
|
else
|
|
log_info "VG pve not found, checking for existing backups..."
|
|
local backup_file=$(ssh_r630_01 "ls -t /etc/lvm/backup/pve* 2>/dev/null | head -1")
|
|
if [ -n "$backup_file" ]; then
|
|
log_info "Found existing backup: $backup_file"
|
|
else
|
|
log_warn "No VG and no backup found - LVM restoration may not work"
|
|
fi
|
|
fi
|
|
|
|
return 0
|
|
}
|
|
|
|
stop_containers_vms() {
|
|
log_info "Stopping all containers and VMs..."
|
|
|
|
# Get list of running containers
|
|
local running_containers=$(ssh_r630_01 "pct list | awk 'NR>1 && \$2==\"running\" {print \$1}'")
|
|
local running_vms=$(ssh_r630_01 "qm list | awk 'NR>1 && \$2==\"running\" {print \$1}'")
|
|
|
|
# Stop containers
|
|
if [ -n "$running_containers" ]; then
|
|
log_info "Stopping containers: $running_containers"
|
|
for vmid in $running_containers; do
|
|
log_info "Stopping container $vmid..."
|
|
ssh_r630_01 "pct stop $vmid" || log_warn "Failed to stop container $vmid"
|
|
done
|
|
sleep 5
|
|
fi
|
|
|
|
# Stop VMs
|
|
if [ -n "$running_vms" ]; then
|
|
log_info "Stopping VMs: $running_vms"
|
|
for vmid in $running_vms; do
|
|
log_info "Stopping VM $vmid..."
|
|
ssh_r630_01 "qm shutdown $vmid" || ssh_r630_01 "qm stop $vmid" || log_warn "Failed to stop VM $vmid"
|
|
done
|
|
sleep 10
|
|
fi
|
|
|
|
# Wait for all to stop
|
|
log_info "Waiting for all containers/VMs to stop..."
|
|
sleep 10
|
|
|
|
log_success "Containers and VMs stopped"
|
|
return 0
|
|
}
|
|
|
|
deactivate_lvm() {
|
|
log_info "Deactivating LVM volumes on pve VG..."
|
|
|
|
# Check if VG exists
|
|
if ! ssh_r630_01 "vgs pve >/dev/null 2>&1"; then
|
|
log_info "VG pve not found (may have been removed already)"
|
|
return 0
|
|
fi
|
|
|
|
# Force deactivate (may still have some mounts)
|
|
ssh_r630_01 "vgchange -an pve" || {
|
|
log_warn "Normal deactivate failed, trying force..."
|
|
ssh_r630_01 "vgchange -an --force pve" || {
|
|
log_warn "VG may already be deactivated or removed"
|
|
}
|
|
}
|
|
|
|
log_success "LVM volumes deactivated"
|
|
return 0
|
|
}
|
|
|
|
remove_pv_from_vg() {
|
|
log_info "Removing RAID PV from pve VG..."
|
|
|
|
# Check if VG exists
|
|
if ! ssh_r630_01 "vgs pve >/dev/null 2>&1"; then
|
|
log_info "VG pve not found, skipping PV removal"
|
|
return 0
|
|
fi
|
|
|
|
# Check if RAID exists
|
|
if ! ssh_r630_01 "test -b /dev/md0"; then
|
|
log_info "RAID /dev/md0 not found, PV may already be removed"
|
|
return 0
|
|
fi
|
|
|
|
# Remove the PV from VG (this should release the device)
|
|
ssh_r630_01 "vgreduce pve /dev/md0" || {
|
|
log_warn "Failed to remove PV from VG, may already be removed"
|
|
}
|
|
|
|
log_success "PV removed from VG"
|
|
return 0
|
|
}
|
|
|
|
stop_raid() {
|
|
log_info "Stopping RAID array /dev/md0..."
|
|
|
|
# Check if RAID exists
|
|
if ! ssh_r630_01 "test -b /dev/md0"; then
|
|
log_info "RAID /dev/md0 already stopped or doesn't exist"
|
|
return 0
|
|
fi
|
|
|
|
# Remove device mapper entries
|
|
log_info "Removing device mapper entries..."
|
|
ssh_r630_01 "dmsetup remove_all --force 2>/dev/null" || true
|
|
sleep 2
|
|
|
|
# Unmount any filesystems
|
|
ssh_r630_01 "umount /dev/md0* 2>/dev/null" || true
|
|
|
|
# Try to stop processes using md0
|
|
log_info "Checking for processes using md0..."
|
|
ssh_r630_01 "fuser -km /dev/md0 2>/dev/null" || true
|
|
sleep 2
|
|
|
|
# Stop the array
|
|
ssh_r630_01 "mdadm --stop /dev/md0" || {
|
|
log_warn "Normal stop failed, trying with --force..."
|
|
ssh_r630_01 "mdadm --stop --force /dev/md0" || {
|
|
log_warn "Force stop failed, RAID may already be stopped"
|
|
}
|
|
}
|
|
|
|
log_success "RAID array stopped"
|
|
return 0
|
|
}
|
|
|
|
wipe_raid_superblocks() {
|
|
log_info "Wiping RAID superblocks from disks..."
|
|
|
|
# Wipe superblocks from all disks
|
|
for disk in sdc sdd sde sdf sdg sdh; do
|
|
log_info "Wiping superblock from /dev/$disk..."
|
|
ssh_r630_01 "mdadm --zero-superblock /dev/$disk 2>/dev/null" || {
|
|
log_warn "Failed to wipe superblock from $disk (may not have one)"
|
|
}
|
|
done
|
|
|
|
log_success "Superblocks wiped"
|
|
return 0
|
|
}
|
|
|
|
create_6disk_raid10() {
|
|
log_info "Creating RAID 10 with all 6 disks (sdc-sdh)..."
|
|
|
|
# Remove old RAID from mdadm.conf
|
|
ssh_r630_01 "sed -i '/md0/d' /etc/mdadm/mdadm.conf" || true
|
|
|
|
# Wipe old superblocks
|
|
wipe_raid_superblocks
|
|
|
|
# Create new RAID 10 with 6 disks (with auto-confirm for bitmap)
|
|
log_info "Creating RAID 10 array..."
|
|
ssh_r630_01 "echo y | mdadm --create /dev/md0 --level=10 --raid-devices=6 /dev/sdc /dev/sdd /dev/sde /dev/sdf /dev/sdg /dev/sdh --bitmap=internal" || {
|
|
log_error "Failed to create RAID 10 with 6 disks"
|
|
return 1
|
|
}
|
|
|
|
log_success "RAID 10 created with all 6 disks"
|
|
|
|
# Wait for sync
|
|
log_info "Waiting for RAID array to synchronize (this may take 1-2 hours)..."
|
|
local max_wait=10800 # 3 hours max
|
|
local waited=0
|
|
|
|
while [ $waited -lt $max_wait ]; do
|
|
local status=$(ssh_r630_01 "cat /proc/mdstat 2>/dev/null | grep -A 2 md0 | tail -1")
|
|
|
|
if echo "$status" | grep -q "\[UUUUUU\]"; then
|
|
log_success "RAID array is fully synchronized"
|
|
break
|
|
elif echo "$status" | grep -q "recovery\|resync"; then
|
|
local progress=$(echo "$status" | grep -oP '\d+\.\d+%' || echo "in progress")
|
|
if [ $((waited % 300)) -eq 0 ]; then # Log every 5 minutes
|
|
log_info "RAID sync progress: $progress (elapsed: $((waited/60)) minutes)"
|
|
fi
|
|
sleep 30
|
|
waited=$((waited + 30))
|
|
else
|
|
sleep 10
|
|
waited=$((waited + 10))
|
|
fi
|
|
done
|
|
|
|
if [ $waited -ge $max_wait ]; then
|
|
log_warn "RAID sync may still be in progress. Check manually: cat /proc/mdstat"
|
|
fi
|
|
|
|
# Save configuration
|
|
log_info "Saving RAID configuration..."
|
|
ssh_r630_01 "mdadm --detail --scan >> /etc/mdadm/mdadm.conf" || {
|
|
log_warn "Failed to save to mdadm.conf"
|
|
}
|
|
|
|
ssh_r630_01 "update-initramfs -u" || true
|
|
|
|
return 0
|
|
}
|
|
|
|
restore_lvm() {
|
|
log_info "Restoring LVM on new RAID..."
|
|
|
|
# First, check if we need to restore the VG metadata
|
|
local backup_file=$(ssh_r630_01 "ls -t /etc/lvm/backup/pve* 2>/dev/null | head -1")
|
|
|
|
if [ -n "$backup_file" ]; then
|
|
log_info "Found LVM backup: $backup_file"
|
|
log_info "Restoring VG metadata..."
|
|
|
|
# Restore VG metadata
|
|
ssh_r630_01 "vgcfgrestore -f $backup_file pve" || {
|
|
log_warn "vgcfgrestore failed, trying alternative method..."
|
|
}
|
|
else
|
|
log_warn "No LVM backup found, will need to recreate"
|
|
fi
|
|
|
|
# Create PV on new RAID
|
|
log_info "Creating physical volume on new RAID..."
|
|
ssh_r630_01 "pvcreate --uuid $(ssh_r630_01 \"pvdisplay /dev/md0 2>/dev/null | grep 'PV UUID' | awk '{print \$3}' || echo '')\" /dev/md0 2>/dev/null || pvcreate /dev/md0" || {
|
|
log_error "Failed to create PV"
|
|
return 1
|
|
}
|
|
|
|
# Restore VG if needed
|
|
if ! ssh_r630_01 "vgs pve >/dev/null 2>&1"; then
|
|
log_info "Restoring volume group..."
|
|
if [ -n "$backup_file" ]; then
|
|
ssh_r630_01 "vgcfgrestore -f $backup_file pve" || {
|
|
log_error "Failed to restore VG"
|
|
return 1
|
|
}
|
|
else
|
|
log_error "Cannot restore VG without backup"
|
|
return 1
|
|
fi
|
|
fi
|
|
|
|
# Activate VG
|
|
log_info "Activating volume group..."
|
|
ssh_r630_01 "vgchange -ay pve" || {
|
|
log_error "Failed to activate VG"
|
|
return 1
|
|
}
|
|
|
|
log_success "LVM restored and activated on new RAID"
|
|
return 0
|
|
}
|
|
|
|
show_status() {
|
|
log_info "=== RAID Status ==="
|
|
ssh_r630_01 "cat /proc/mdstat"
|
|
echo ""
|
|
ssh_r630_01 "mdadm --detail /dev/md0"
|
|
echo ""
|
|
log_info "=== LVM Status ==="
|
|
ssh_r630_01 "vgs pve"
|
|
ssh_r630_01 "pvs | grep pve"
|
|
}
|
|
|
|
main() {
|
|
echo ""
|
|
log_warn "=== WARNING: RAID 10 Expansion to 6 Disks ==="
|
|
log_warn ""
|
|
log_warn "This script will:"
|
|
log_warn "1. STOP the current RAID 10 array"
|
|
log_warn "2. CREATE a new RAID 10 with all 6 disks"
|
|
log_warn "3. Attempt to restore LVM configuration"
|
|
log_warn ""
|
|
log_warn "IMPORTANT:"
|
|
log_warn "- This will cause DOWNTIME"
|
|
log_warn "- All containers/VMs will be unavailable"
|
|
log_warn "- LVM volumes may need manual restoration"
|
|
log_warn "- Data backup is STRONGLY recommended"
|
|
log_warn ""
|
|
log_warn "This is a DESTRUCTIVE operation!"
|
|
echo ""
|
|
log_warn "Auto-confirming and proceeding with expansion..."
|
|
log_warn "This is a destructive operation - all containers/VMs will be unavailable during this process"
|
|
sleep 3
|
|
|
|
# Check prerequisites
|
|
if ! check_prerequisites; then
|
|
exit 1
|
|
fi
|
|
|
|
# Backup LVM config
|
|
backup_lvm_config
|
|
|
|
# Stop all containers/VMs
|
|
if ! stop_containers_vms; then
|
|
log_warn "Some containers/VMs may not have stopped, continuing anyway..."
|
|
fi
|
|
|
|
# Deactivate LVM volumes
|
|
if ! deactivate_lvm; then
|
|
log_error "Failed to deactivate LVM volumes"
|
|
log_warn "Attempting to reactivate VG..."
|
|
ssh_r630_01 "vgchange -ay pve" || true
|
|
exit 1
|
|
fi
|
|
|
|
# Remove PV from VG
|
|
if ! remove_pv_from_vg; then
|
|
log_warn "Failed to remove PV, continuing anyway..."
|
|
fi
|
|
|
|
# Stop RAID
|
|
if ! stop_raid; then
|
|
log_error "Failed to stop RAID array"
|
|
log_warn "Attempting to reactivate LVM..."
|
|
ssh_r630_01 "vgextend pve /dev/md0 2>/dev/null || true"
|
|
ssh_r630_01 "vgchange -ay pve" || true
|
|
exit 1
|
|
fi
|
|
|
|
# Wait a moment for device to be fully released
|
|
sleep 3
|
|
|
|
# Create 6-disk RAID
|
|
if ! create_6disk_raid10; then
|
|
log_error "Failed to create 6-disk RAID"
|
|
log_warn "You may need to manually recover"
|
|
exit 1
|
|
fi
|
|
|
|
# Restore LVM
|
|
if ! restore_lvm; then
|
|
log_error "LVM restoration had issues"
|
|
log_warn "You may need to manually restore LVM volumes"
|
|
log_warn "Check: vgcfgrestore -l pve"
|
|
fi
|
|
|
|
# Show status
|
|
show_status
|
|
|
|
log_success "RAID 10 expansion completed!"
|
|
log_info ""
|
|
log_info "RAID Device: /dev/md0"
|
|
log_info "Capacity: ~700GB (RAID 10 with 6 disks)"
|
|
log_info "Performance: Maximum (6x read, 3x write)"
|
|
log_info "Redundancy: Can survive 1-3 disk failures"
|
|
log_info ""
|
|
log_warn "IMPORTANT: Verify all containers/VMs are accessible"
|
|
log_warn "You may need to manually restore LVM volumes if restoration failed"
|
|
}
|
|
|
|
main "$@"
|