Some checks failed
Deploy to Phoenix / deploy (push) Has been cancelled
Co-authored-by: Cursor <cursoragent@cursor.com>
76 lines
3.1 KiB
Bash
Executable File
76 lines
3.1 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# Make RPC VMIDs (2101, 2500-2505) writable by running e2fsck on their rootfs (fixes read-only remount after ext4 errors).
|
|
# SSHs to the Proxmox host (r630-01), stops each CT, runs e2fsck -f -y on the LV, starts the CT.
|
|
#
|
|
# Usage: ./scripts/maintenance/make-rpc-vmids-writable-via-ssh.sh [--dry-run]
|
|
# Run from project root. Requires: SSH to r630-01 (root, key-based).
|
|
# See: docs/00-meta/502_DEEP_DIVE_ROOT_CAUSES_AND_FIXES.md §Read-only CT
|
|
|
|
set -euo pipefail
|
|
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
|
[[ -f "${PROJECT_ROOT}/config/ip-addresses.conf" ]] && source "${PROJECT_ROOT}/config/ip-addresses.conf" 2>/dev/null || true
|
|
|
|
HOST="${PROXMOX_HOST_R630_01:-192.168.11.11}"
|
|
# RPC VMIDs on r630-01: Core (2101) + Alltra/HYBX (2500-2505)
|
|
RPC_VMIDS=(2101 2500 2501 2502 2503 2504 2505)
|
|
SSH_OPTS="-o ConnectTimeout=15 -o StrictHostKeyChecking=accept-new"
|
|
|
|
DRY_RUN=false
|
|
[[ "${1:-}" == "--dry-run" ]] && DRY_RUN=true
|
|
|
|
log_info() { echo -e "\033[0;34m[INFO]\033[0m $1"; }
|
|
log_ok() { echo -e "\033[0;32m[✓]\033[0m $1"; }
|
|
log_warn() { echo -e "\033[0;33m[⚠]\033[0m $1"; }
|
|
|
|
echo ""
|
|
echo "=== Make RPC VMIDs writable via Proxmox SSH ==="
|
|
echo " Host: $HOST VMIDs: ${RPC_VMIDS[*]} dry-run=$DRY_RUN"
|
|
echo ""
|
|
|
|
if ! ssh $SSH_OPTS "root@$HOST" "echo OK" 2>/dev/null; then
|
|
echo "Cannot SSH to $HOST. Run from LAN with key-based auth to root@$HOST."
|
|
exit 1
|
|
fi
|
|
log_ok "SSH to $HOST OK"
|
|
|
|
if $DRY_RUN; then
|
|
echo "Would run on $HOST for each VMID: pct stop <vmid>; e2fsck -f -y /dev/pve/vm-<vmid>-disk-0; pct start <vmid>"
|
|
exit 0
|
|
fi
|
|
|
|
for vmid in "${RPC_VMIDS[@]}"; do
|
|
log_info "VMID $vmid: stop, e2fsck, start..."
|
|
status=$(ssh $SSH_OPTS "root@$HOST" "pct status $vmid 2>/dev/null | awk '{print \$2}'" 2>/dev/null || echo "missing")
|
|
if [[ "$status" == "missing" || -z "$status" ]]; then
|
|
log_warn "VMID $vmid not found on $HOST; skip"
|
|
continue
|
|
fi
|
|
ssh $SSH_OPTS "root@$HOST" "pct stop $vmid 2>/dev/null || true"
|
|
sleep 2
|
|
# e2fsck returns 1 when it corrected errors; we don't want set -e to exit
|
|
out=$(ssh $SSH_OPTS "root@$HOST" "lvchange -ay /dev/pve/vm-${vmid}-disk-0 2>/dev/null; e2fsck -f -y /dev/pve/vm-${vmid}-disk-0 2>&1" || true)
|
|
echo "$out" | tail -3
|
|
if echo "$out" | grep -q "FILE SYSTEM WAS MODIFIED\|No errors detected"; then
|
|
log_ok "e2fsck done for $vmid"
|
|
elif echo "$out" | grep -q "e2fsck"; then
|
|
log_ok "e2fsck run for $vmid"
|
|
else
|
|
log_warn "e2fsck may have failed for $vmid (LV name may differ)"
|
|
fi
|
|
# Start CT (LV stays active; do not run lvchange -an before pct start or the LV may be inactive when the container tries to mount rootfs)
|
|
ssh $SSH_OPTS "root@$HOST" "pct start $vmid 2>/dev/null" || log_warn "pct start $vmid failed"
|
|
sleep 2
|
|
# Quick writability check
|
|
if ssh $SSH_OPTS "root@$HOST" "pct exec $vmid -- touch /tmp/.w 2>/dev/null && pct exec $vmid -- rm -f /tmp/.w 2>/dev/null"; then
|
|
log_ok "VMID $vmid writable"
|
|
else
|
|
log_warn "VMID $vmid /tmp still not writable (may need retry or different fix)"
|
|
fi
|
|
done
|
|
|
|
echo ""
|
|
log_ok "Done. Re-run fix/install scripts as needed."
|
|
echo ""
|