#!/bin/bash
#---------------------------------*- sh -*-------------------------------------
# ==  == ====== ====   ====    |
#                   \\     ||  | Multiphase Code Repository by HZDR
# ======   //   ||  || ===//   | Website: https://doi.org/10.14278/rodare.767
# ||  ||  //    ||  // || \\   | License: GPL-3.0-or-later
# ==  == ====== ====   ==  ==  |
#------------------------------------------------------------------------------
# License
#     This file is part of the Multiphase Code Repository by HZDR.
#
#     Copyright (C) 2025 by Helmholtz-Zentrum Dresden-Rossendorf e.V. (HZDR),
#     Website: https://hzdr.de
#
#     Multiphase Code Repository by HZDR is based on the free software for
#     computational fluid dynamics (CFD) from the OpenFOAM Foundation.
#     Copyright (C) 2025 by OpenFOAM Foundation, Website: https://openfoam.org
#
#     If you are interested in which files are original OpenFOAM Foundation
#     files, which OpenFOAM Foundation files were modified, and which files were
#     newly created, see FILES.md.
#
#     Multiphase Code Repository by HZDR is free software: you can redistribute
#     it and/or modify it under the terms of the GNU General Public License as
#     published by the Free Software Foundation, either version 3 of the
#     License, or (at your option) any later version.
#
#     Multiphase Code Repository by HZDR is distributed in the hope that it will
#     be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
#     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
#     Public License for more details.
#
#     You should have received a copy of the GNU General Public License along
#     with the Multiphase Code Repository by HZDR. If not, see
#     <http://www.gnu.org/licenses/>.
#
# Description
#     SLURM script to run jobs on an HPC Cluster allowing use of preemption
#     partitions. It allows a simulation to be stopped temporarily when a
#     "high-priority" job accesses the resources. For this to work, the setup
#     needs to be interruptible, i.e. must write check points. The script has
#     not been tested using Apptainer images.
#
#     Required controlDict entries:
#
#        startFrom           latestTime;
#        writeFormat         binary;
#        functions
#        {
#            #includeFunc    stopAtFile(action=writeNow);
#        }
#
#     Required append option in Allrun:
#
#       runParallel -append $application
#
#     Command to completely kill job without resubmission:
#
#       scancel -b -s 9 <JOB-ID>
#
#------------------------------------------------------------------------------

# Partition
#SBATCH --partition=low

# Set number of nodes
#SBATCH --nodes=1

# Set number of tasks per node according to the partition used
#SBATCH --ntasks-per-node=4

# Set account for accessing a specific cpu time contingent
#SBATCH --account=low

# Set name of the job
#SBATCH --job-name=<job_name>

# Mail alert at BEGIN|END|FAIL|ALL
#SBATCH --mail-type=ALL

# Join stdout and stderr and set file name for output
#SBATCH --output=slurm-%j.out

# Append to existing log file in case job is requeued
#SBATCH --open-mode=append

# Signal SIGTERM to batch shell 150s before end of wall time
#SBATCH --signal=B:15@150

# Trap function to end OpenFOAM job on signal
stopAtWriteNowSignal()
{
    echo "Using stopAtFile to end OpenFOAM job"
    touch "$SLURM_SUBMIT_DIR/stop"
    echo "Waiting for end of OpenFOAM job"
    success=0
    while :
    do
        if [[ ! -e $SLURM_SUBMIT_DIR/stop ]]; then
            echo "Data written ... aborting"
            success=1
            break
        fi
        sleep 1
    done

    eT=$(foamDictionary -expand -entry endTime -value \
        "$SLURM_SUBMIT_DIR/system/controlDict" | \
        grep -m 1 -e '^[0-9]*\.\{0,1\}[0-9]*$')
    lT=$(foamListTimes -functionEntries -processor -latestTime -case \
        "$SLURM_SUBMIT_DIR" | grep -m 1 -e '^[0-9]*\.\{0,1\}[0-9]*$')
    wI=$(foamDictionary -expand -entry writeInterval -value \
        "$SLURM_SUBMIT_DIR/system/controlDict" | \
        grep -m 1 -e '^[0-9]*\.\{0,1\}[0-9]*$')

    if [[ $(echo "$lT <= ($eT - $wI)" | bc -l) -eq 1 && $success -eq 1 ]]; then
        echo "Resubmitting job"
        cd "$SLURM_SUBMIT_DIR" || exit
        scontrol requeue "$SLURM_JOBID"
    fi
}

# Create a trap to catch signal
trap stopAtWriteNowSignal 15

# Load modules required for OpenFOAM
module load gcc OpenFOAM/hzdr gnuplot

# Run application
./Allrun &
wait
