- Imagine the power goes down for a short amount of time but already one of your Proxmox hosts gets powered down. What if our script was to detect the change and then based on the previously saved flags trigger a wake-on-LAN call to wake that host (or those hosts) back up?
- In my case, I actually have the RPi connected to a different ‘Router UPS’ that lasts much longer (around 2-3 hours) and so if the power goes down before the Pi is off, it can switch everything back on.
- 2x pre-requisites:
- To make it work, you will need to allow WoL in the BIOS of each of your Proxmox host – if not set already, you will need to plug a screen/keyboard to it, reboot it and “Wake-on-LAN,” “Power On by PCIE” or something similar to set it up.
- You will also need to locate the MAC address of the management port of each of your Proxmox hosts. You can SSH in and run
ip addrand find the interface that you use to connect to the web GUI with. Typically, that would be the MAC address of yourvmbr0interface for LAN.
- Install the required package on the RPi:
sudo apt install wakeonlan
- We will need to modify two scripts:
- Our ups_manager.sh script to account for the MAC address of each Proxmox host we want to provide
WoLfor if power goes back up before a total shutdown. - The
onbatteryscript to extract those MAC addresses and send aWoLpacket to each host that had the shutdown flag on (before that flag was deleted).
- Our ups_manager.sh script to account for the MAC address of each Proxmox host we want to provide
- Update the
ups_manager.shscript:
sudo nano /usr/local/sbin/ups_manager.sh #!/bin/bash #================================================ # UPS SHUTDOWN MANAGER # #================================================ # --- User Configuration --- EMAIL_TO="[email protected]" PROXMOX_HOSTS=( "192.168.8.4" # Proxmox2 "192.168.8.3" # Proxmox1 ) START_PERCENT=80 END_PERCENT=30 PI_SHUTDOWN_PERCENT=10 # --- System Configuration --- LOG_FILE="/var/log/ups_manager.log" FLAG_DIR="/tmp/ups_shutdown_flags" PI_FLAG_FILE="${FLAG_DIR}/pi_shutdown.flag" # --- Add MAC addresses for each host fo Wake-on-LAN declare -A PROXMOX_MACS PROXMOX_MACS["192.168.8.4"]="40:62:31:0a:d8:f5" # Proxmox2 MAC PROXMOX_MACS["192.168.8.3"]="00:b0:b7:e0:01:f8" # Proxmox1 MAC # --- Cron-safe paths --- APCACCESS_CMD="/usr/sbin/apcaccess" GREP_CMD="/usr/bin/grep" AWK_CMD="/usr/bin/awk" BC_CMD="/usr/bin/bc" SSH_CMD="/usr/bin/ssh" #================================================ # FUNCTIONS # #================================================ log_message() { echo "$(date '+%Y-%m-%d %H:%M:%S') - $1" | tee -a "$LOG_FILE" } send_email() { local subject="$1" local body="$2" echo "$body" | mutt -s "$subject" "$EMAIL_TO" log_message "Email sent to $EMAIL_TO: $subject" } graceful_shutdown_host() { local HOST_IP=$1 log_message "Attempting graceful (non-blocking) shutdown of $HOST_IP..." # This command runs remotely on the Proxmox host # It will not block if a VM gets stuck. $SSH_CMD -i /root/.ssh/upsmanage_rsa -o ConnectTimeout=10 root@$HOST_IP ' log_msg() { echo "$(date): $1"; } log_msg "Received shutdown signal from UPS manager." # 1. Gracefully shut down all running VMs log_msg "Sending shutdown signal to all QEMU VMs..." for vmid in $(qm list | grep running | awk "{print \\$1}"); do qm shutdown $vmid done # 2. Wait 5 minutes (300 seconds) for VMs to shut down log_msg "Waiting 300 seconds for graceful VM shutdown..." sleep 300 # 3. Forcefully stop any VMs still running (like stuck Windows VMs) log_msg "Forcing shutdown of any remaining VMs..." for vmid in $(qm list | grep running | awk "{print \\$1}"); do log_msg "VM $vmid is stuck. Forcing stop." qm stop $vmid done # 4. Gracefully shut down all running containers log_msg "Sending shutdown signal to all LXC Containers..." for ctid in $(pct list | grep running | awk "{print \\$1}"); do pct shutdown $ctid done # 5. Wait 2 minutes (120 seconds) for containers log_msg "Waiting 120 seconds for containers to stop..." sleep 120 # 6. Forcefully stop any containers still running log_msg "Forcing shutdown of any remaining containers..." for ctid in $(pct list | grep running | awk "{print \\$1}"); do log_msg "Container $ctid is stuck. Forcing stop." pct stop $ctid done # 7. Shut down the Proxmox host log_msg "All guests stopped. Shutting down Proxmox host now." shutdown -h now ' } #================================================ # SCRIPT LOGIC # #================================================ mkdir -p "$FLAG_DIR" log_message "Script started. Checking UPS status..." # --- Get UPS Status --- APC_OUTPUT=$($APCACCESS_CMD) if [ $? -ne 0 ]; then log_message "FATAL: 'apcaccess' command failed. Is apcupsd running?" exit 1 fi UPS_STATUS=$(echo "$APC_OUTPUT" | $GREP_CMD "STATUS" | $AWK_CMD '{print $3}') # Use BCHARGE per your discovery BATT_PERCENT=$(echo "$APC_OUTPUT" | $GREP_CMD "BCHARGE" | $AWK_CMD '{print $3}' | cut -d'.' -f1) if [ "$UPS_STATUS" != "ONBATT" ]; then log_message "UPS is on line power ($UPS_STATUS). No action needed." exit 0 fi log_message "WARNING: UPS is on battery! Current level: ${BATT_PERCENT}%" # --- Dynamic Threshold Calculation --- declare -a THRESHOLDS HOST_COUNT=${#PROXMOX_HOSTS[@]} if [ "$HOST_COUNT" -eq 1 ]; then THRESHOLDS=($START_PERCENT) else RANGE=$(($START_PERCENT - $END_PERCENT)) INTERVALS=$(($HOST_COUNT - 1)) STEP=$(echo "scale=4; $RANGE / $INTERVALS" | $BC_CMD) for (( i=0; i<$HOST_COUNT; i++ )); do THRESH=$(echo "scale=4; $START_PERCENT - ($i * $STEP)" | $BC_CMD) THRESHOLDS[$i]=$(printf "%.0f" "$THRESH") done fi log_message "Calculated shutdown thresholds: ${THRESHOLDS[*]}" # --- Check Proxmox Hosts --- for (( i=0; i<${#PROXMOX_HOSTS[@]}; i++ )); do HOST_IP=${PROXMOX_HOSTS[$i]} HOST_THRESHOLD=${THRESHOLDS[$i]} FLAG_FILE="${FLAG_DIR}/host_${HOST_IP}.flag" if [ "$BATT_PERCENT" -le "$HOST_THRESHOLD" ] && [ ! -f "$FLAG_FILE" ]; then log_message "TRIGGER: Battery at ${BATT_PERCENT}%. Threshold of ${HOST_THRESHOLD}% met for ${HOST_IP}." touch "$FLAG_FILE" SUBJECT="UPS ALERT: Shutting down Proxmox Host ${HOST_IP}" BODY="UPS battery level reached ${BATT_PERCENT}%. Triggering graceful (non-blocking) shutdown for Proxmox host at ${HOST_IP} (Threshold: ${HOST_THRESHOLD}%)." send_email "$SUBJECT" "$BODY" # Call shutdown function in the background graceful_shutdown_host "$HOST_IP" & elif [ -f "$FLAG_FILE" ]; then log_message "INFO: Shutdown command for ${HOST_IP} already sent." fi done # --- Check Raspberry Pi Self-Shutdown --- if [ "$BATT_PERCENT" -le "$PI_SHUTDOWN_PERCENT" ] && [ ! -f "$PI_FLAG_FILE" ]; then log_message "CRITICAL: Battery at ${BATT_PERCENT}%. Shutting down Raspberry Pi." touch "$PI_FLAG_FILE" sudo shutdown -h now fi log_message "The script has finished."
- And then we will need to update our
onbatteryscript:
nano /etc/apcupsd/offbattery #!/bin/bash # Variables MAIL_TO="[email protected]" LOG_FILE="/var/log/ups_manager.log" MAIL_BODY="/tmp/power_restored.html" SUBJ="Power restored for `hostname`" # We do this *before* sending the email, so the log file is up-to-date. CONFIG_FILE="/usr/local/sbin/ups_manager.sh" if [ -f "$CONFIG_FILE" ]; then # Source the config to get host IPs, MACs, and flag dir # We must 'eval' the arrays so that this script can read them eval $(grep -E 'PROXMOX_HOSTS=\\(' $CONFIG_FILE) eval $(grep -E 'declare -A PROXMOX_MACS' $CONFIG_FILE) eval $(grep -E 'PROXMOX_MACS\\[' $CONFIG_FILE) eval $(grep -E 'FLAG_DIR=' $CONFIG_FILE) WAKE_CMD="/usr/bin/wakeonlan" echo "$(date) - Power restored. Checking for hosts to wake up." >> "$LOG_FILE" if [ -n "$FLAG_DIR" ] && [ -n "${PROXMOX_HOSTS[0]}" ]; then # Loop through all known hosts for HOST_IP in "${PROXMOX_HOSTS[@]}"; do FLAG_FILE="${FLAG_DIR}/host_${HOST_IP}.flag" # Check if this host was shut down by our script if [ -f "$FLAG_FILE" ]; then # Host was shut down. Let's wake it up. # Get the MAC from the associative array MAC_VAR="PROXMOX_MACS[\\"$HOST_IP\\"]" eval "HOST_MAC=\\$$MAC_VAR" if [ -n "$HOST_MAC" ] && [ "$HOST_MAC" != "00:00:00:00:00:00" ]; then echo "$(date) - Waking up $HOST_IP (MAC: $HOST_MAC)..." >> "$LOG_FILE" $WAKE_CMD "$HOST_MAC" else echo "$(date) - ERROR: No valid MAC address found for $HOST_IP. Cannot wake." >> "$LOG_FILE" fi fi done else echo "$(date) - ERROR: Could not read arrays or FLAG_DIR from $CONFIG_FILE." >> "$LOG_FILE" fi else echo "$(date) - ERROR: Could not find $CONFIG_FILE to source for WoL." >> "$LOG_FILE" fi # Create HTML email body cat > $MAIL_BODY << EOF <html> <body> <h2>Good news, power was restored!</h2> <p>Wake-on-LAN commands have been sent to any hosts that were shut down.</p> <p><strong>APC status just after restoration:</strong> <pre>`/usr/sbin/apcaccess status`</pre> <p><strong>Recent log file output (including WoL attempts):</strong> <pre>`tail -n 20 /var/log/ups_manager.log`</pre> <p>Your RPi script :)</p> </body> </html> EOF # Send the email mutt -e 'set content_type="text/html"' \\ -s "$SUBJ" \\ "$MAIL_TO" \\ -a "$LOG_FILE" < "$MAIL_BODY" # Clear the flags *after* checking them and sending the email --- echo "$(date) - Clearing all shutdown flags." >> "$LOG_FILE" if [ -n "$FLAG_DIR" ]; then rm -f ${FLAG_DIR}/host_*.flag rm -f ${FLAG_DIR}/pi_shutdown.flag else # Fallback to the original path just in case config sourcing failed rm -f /tmp/ups_shutdown_flags/host_*.flag rm -f /tmp/ups_shutdown_flags/pi_shutdown.flag fi rm -f "$MAIL_BODY" # Remove our temporary file exit 0