Files
proxmox/scripts/maintenance/make-rpc-vmids-writable-via-ssh.sh
defiQUG bea1903ac9
Some checks failed
Deploy to Phoenix / deploy (push) Has been cancelled
Sync all local changes: docs, config, scripts, submodule refs, verification evidence
Co-authored-by: Cursor <cursoragent@cursor.com>
2026-02-21 15:46:06 -08:00

76 lines
3.1 KiB
Bash
Executable File

#!/usr/bin/env bash
# Make RPC VMIDs (2101, 2500-2505) writable by running e2fsck on their rootfs (fixes read-only remount after ext4 errors).
# SSHs to the Proxmox host (r630-01), stops each CT, runs e2fsck -f -y on the LV, starts the CT.
#
# Usage: ./scripts/maintenance/make-rpc-vmids-writable-via-ssh.sh [--dry-run]
# Run from project root. Requires: SSH to r630-01 (root, key-based).
# See: docs/00-meta/502_DEEP_DIVE_ROOT_CAUSES_AND_FIXES.md §Read-only CT
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
[[ -f "${PROJECT_ROOT}/config/ip-addresses.conf" ]] && source "${PROJECT_ROOT}/config/ip-addresses.conf" 2>/dev/null || true
HOST="${PROXMOX_HOST_R630_01:-192.168.11.11}"
# RPC VMIDs on r630-01: Core (2101) + Alltra/HYBX (2500-2505)
RPC_VMIDS=(2101 2500 2501 2502 2503 2504 2505)
SSH_OPTS="-o ConnectTimeout=15 -o StrictHostKeyChecking=accept-new"
DRY_RUN=false
[[ "${1:-}" == "--dry-run" ]] && DRY_RUN=true
log_info() { echo -e "\033[0;34m[INFO]\033[0m $1"; }
log_ok() { echo -e "\033[0;32m[✓]\033[0m $1"; }
log_warn() { echo -e "\033[0;33m[⚠]\033[0m $1"; }
echo ""
echo "=== Make RPC VMIDs writable via Proxmox SSH ==="
echo " Host: $HOST VMIDs: ${RPC_VMIDS[*]} dry-run=$DRY_RUN"
echo ""
if ! ssh $SSH_OPTS "root@$HOST" "echo OK" 2>/dev/null; then
echo "Cannot SSH to $HOST. Run from LAN with key-based auth to root@$HOST."
exit 1
fi
log_ok "SSH to $HOST OK"
if $DRY_RUN; then
echo "Would run on $HOST for each VMID: pct stop <vmid>; e2fsck -f -y /dev/pve/vm-<vmid>-disk-0; pct start <vmid>"
exit 0
fi
for vmid in "${RPC_VMIDS[@]}"; do
log_info "VMID $vmid: stop, e2fsck, start..."
status=$(ssh $SSH_OPTS "root@$HOST" "pct status $vmid 2>/dev/null | awk '{print \$2}'" 2>/dev/null || echo "missing")
if [[ "$status" == "missing" || -z "$status" ]]; then
log_warn "VMID $vmid not found on $HOST; skip"
continue
fi
ssh $SSH_OPTS "root@$HOST" "pct stop $vmid 2>/dev/null || true"
sleep 2
# e2fsck returns 1 when it corrected errors; we don't want set -e to exit
out=$(ssh $SSH_OPTS "root@$HOST" "lvchange -ay /dev/pve/vm-${vmid}-disk-0 2>/dev/null; e2fsck -f -y /dev/pve/vm-${vmid}-disk-0 2>&1" || true)
echo "$out" | tail -3
if echo "$out" | grep -q "FILE SYSTEM WAS MODIFIED\|No errors detected"; then
log_ok "e2fsck done for $vmid"
elif echo "$out" | grep -q "e2fsck"; then
log_ok "e2fsck run for $vmid"
else
log_warn "e2fsck may have failed for $vmid (LV name may differ)"
fi
# Start CT (LV stays active; do not run lvchange -an before pct start or the LV may be inactive when the container tries to mount rootfs)
ssh $SSH_OPTS "root@$HOST" "pct start $vmid 2>/dev/null" || log_warn "pct start $vmid failed"
sleep 2
# Quick writability check
if ssh $SSH_OPTS "root@$HOST" "pct exec $vmid -- touch /tmp/.w 2>/dev/null && pct exec $vmid -- rm -f /tmp/.w 2>/dev/null"; then
log_ok "VMID $vmid writable"
else
log_warn "VMID $vmid /tmp still not writable (may need retry or different fix)"
fi
done
echo ""
log_ok "Done. Re-run fix/install scripts as needed."
echo ""