#!/bin/bash

# Syntax: run_parallel_tests.bash [options] <ORAC_P> <n_procs>
#  where <ORAC_P> is the full name of the program 
#  and <n_procs> is the n. of replicas
#   options: -z -> clear temp files and exit
#            -h -> display this help

# ORAC program: do SMD tests in parallel.
# This script has been tested on Linux/mpich2 platforms only. 


# ----------------------------------------------------------------------       
# Help: copy first comments block in this file
# ----------------------------------------------------------------------
function Help {
name=`basename "$0"`;
sed -n "s/\$0/$name/;2,/^\$/p" $0; exit
}


# ----------------------------------------------------------------------
# other functions
# ----------------------------------------------------------------------
function CleanUpFiles {
    rm -rf $* >& /dev/null
}

# ----------------------------------------------------------------------
# init and defaults   
# ----------------------------------------------------------------------

# change 'MPIRUN=' line below according to your MPI package 
# (a) MPICH2, conforms to MPI-2 recommendation
# mpich2 v.1.0.7 (run 'mpdboot -f mpd.hosts' first; also set up ~/.mpd.conf)
MPIRUN="mpiexec -n"
# mpich2 v.1.4.1
#MPIRUN=mpiexec -f machinefile -n
#   use -disable-hostname-propagation  if you encounter communication errors
#MPIRUN=mpiexec -f machinefile -disable-hostname-propagation -n

# OpenMPI
# MPIRUN="mpirun -nd" 

TEMP_FILES="          \
./*.tpgprm      \
./works*        \
./bin/fes             \
./forward.dat         \
./FORWARD/*           \
./INPUT               \
./OUT_PARALLEL_TEST   \
./PAR????             \
./RESTART_A/*         \
./RESTART_B/*         \
./REVERSE/*           \
./tmp*                \
./works_*             \
"


# ----------------------------------------------------------------------
# command line options
# ----------------------------------------------------------------------
while getopts "hz" Option
do
  case $Option in
    h     )    Help;;    # help: copy first comments block
    z     )    CleanUpFiles $TEMP_FILES; exit ;;  # just cleanup
    *     )    Help;;
  esac
done

shift $(($OPTIND - 1))
#  Decrements the argument pointer so it points to next argument.
#  $1 now references the first non option item supplied on the command line
#+ if one exists.

# ----------------------------------------------------------------------
# command line arguments
# ----------------------------------------------------------------------
if [ $# -lt 2 ] ; then
    help;
    exit
else 
    ORAC_P=$1
    NPROCS=$2
fi


# ----------------------------------------------------------------------
# initial checks and cleanup
# ----------------------------------------------------------------------

#
# make sure all needed programs are executable and on the $PATH
#
###BINARIES="mpiexec mpdallexit mpdboot $ORAC"
BINARIES="mpiexec $ORAC_P"

for prog in $BINARIES; do
    if ! type $prog >/dev/null 2>&1; then
	echo "ERROR: \"$prog\" not found."
	echo "       This is needed by $0 to work. Check your"
	echo "       \$PATH variable or install program \"$prog\"."
	exit 2
    fi
done

#
# files cleanup
#
CleanUpFiles $TEMP_FILES;

#
# cleanup any MPI process left, and initialize MPI  (MPICH2 only)
#

# killall -9 $ORAC_P >&/dev/null
##        kill mpd demon 
# mpdallexit 
##        start the mpd demon on hosts 
#echo "starting the mpd demon" 
# mpdboot -n  `cat mpd.hosts | wc -l` && ( echo "Starting the program on these hosts:"; cat mpd.hosts);

# ----------------------------------------------------------------------
#   TESTS
# ----------------------------------------------------------------------

# First   test 
# generate state A and B starting equilibrium distributions
# state A phase space points in RESTART_A directory
# state B phase space points in RESTART_B directory
echo "Testing a.in: generating starting ghost solute configurations..."
echo "Testing a.in: generating starting ghost solute configurations..." >> OUT_PARALLEL_TEST
rm RESTART_A/* >& /dev/null
rm RESTART_B/* >& /dev/null
rm -fr PAR* >&/dev/null
${MPIRUN} 1 $ORAC_P < a.in >& tmpa ;  
grep "Total    = " PAR0001/out0001 >> OUT_PARALLEL_TEST ;

mv PAR0001/hexa_jpcb.tpgprm .
 
echo "Testing b.in: generating starting full solute configurations..."
echo "Testing b.in: generating starting full solute configurations..." >> OUT_PARALLEL_TEST
rm -fr PAR* >&/dev/null
${MPIRUN} 1 $ORAC_P < b.in >& tmpb   ;
grep "Total    = " PAR0001/out0001 >> OUT_PARALLEL_TEST
mv PAR0001/hexa_jpcb.tpgprm . 

# Third test 
#  

printf 'cleaning... '
rm works_* >& /dev/null
${MPIRUN} $NPROCS rm -fr PAR* >& /dev/null
printf 'done.\n'

# generate  $NPROCS switch-on work measurements in parallel
echo " Switching on of Elte in liquid hexane..."  >> OUT_PARALLEL_TEST
echo " Switching on of Elte in liquid hexane..." 
echo ' Starting job on ' $NPROCS ' processors'
${MPIRUN} $NPROCS  $ORAC_P < 1_para.in >& 1_para.out 
for i in `ls -d PAR*`; do  mv $i/on.wrk SWITCHON/on.wrk.$i; done
printf 'cleaning... '
${MPIRUN} $NPROCS rm -fr PAR* >& /dev/null
printf 'done.\n'

# generate  $NPROCS switch-off work measurements in parallel
echo " Switching off of Elte in liquid hexane..."   >> OUT_PARALLEL_TEST
echo " Switching off of Elte in liquid hexane..."  
echo ' Starting job on ' $NPROCS ' processors'
${MPIRUN} $NPROCS  $ORAC_P < 2_para.in  >& 2_para.out
for i in `ls -d PAR*`; do  mv $i/off.wrk SWITCHOFF/off.wrk.$i; done
printf 'cleaning... '
${MPIRUN} $NPROCS rm -fr PAR* >& /dev/null
printf 'done.\n'


# collect work data in one file for forward and one file for backward trajectory
echo "collecting forward and reverse alchemical work"   
for i in `ls SWITCHON/on*` ; do tail -1 $i | awk '{print  $6}' >> works_forward ; done
for i in `ls SWITCHOFF/off*` ; do tail -1 $i | awk '{print  $6}' >> works_reverse ; done
echo "Switch on works"  >> OUT_PARALLEL_TEST
cat works_forward   >> OUT_PARALLEL_TEST
echo "Switch off works"  >> OUT_PARALLEL_TEST
cat works_reverse   >> OUT_PARALLEL_TEST


# Launch MA and CP PMF reconstruction from bidirectional data
### bin/fes >> OUT_PARALLEL_TEST
echo "Printing free Energy of alchemical swithcON of N-ELTE378 in hexane."   
echo "Printing free Energy of alchemical swithcON of N-ELTE378 in hexane."   >> OUT_PARALLEL_TEST

./bennett >> OUT_PARALLEL_TEST

# ----------------------------------------------------------------------



echo "Doing extra tests on how to restart an alchemical simulation in parallel" 

# generate  $NPROCS switch on work measurements in parallel
echo " Test on restarting an alchemical transformation in parallel"
echo " Test on restarting an alchemical transformation in parallel" >> OUT_PARALLEL_TEST
echo 'Starting job on ' $NPROCS ' processors'
echo " Switching on of Elte in liquid hexane..." 
echo 'Starting job on ' $NPROCS ' processors'   >> OUT_PARALLEL_TEST
echo " Switching on of Elte in liquid hexane..."   >> OUT_PARALLEL_TEST 
${MPIRUN} $NPROCS  $ORAC_P < test_rest0.in >& test_rest0.out 
for i in PAR* ; do tail -1 $i/on.wrk >>  works_with_restart1 ; done

# generate  $NPROCS switch on work measurements in parallel
echo 'Starting job off ' $NPROCS ' processors'
echo " Switching off of Elte in liquid hexane..." 
echo 'Starting job off ' $NPROCS ' processors'   >> OUT_PARALLEL_TEST
echo " Switching off of Elte in liquid hexane..."   >> OUT_PARALLEL_TEST 
${MPIRUN} $NPROCS  $ORAC_P < test_rest1.in >& test_rest1.out 
for i in PAR* ; do tail -1 $i/on.wrk >>  works_with_restart2 ; done
${MPIRUN} $NPROCS rm -fr PAR* >& /dev/null
printf 'done.\n'


echo "Switch on works 0-1000"  >> OUT_PARALLEL_TEST
cat works_*1   >> OUT_PARALLEL_TEST
echo "Switch on works 1000-2000"  >> OUT_PARALLEL_TEST
cat works_*2   >> OUT_PARALLEL_TEST


# Now check if tests are OK (or almost OK) 
diff OUT_PARALLEL_TEST OUT_PARALLEL >& tmp.diff;

echo "** TEST COMPLETED **"

if `test -s tmp.diff`; 
then 
    echo "-------------------------------------------------------------------"
    echo " Warning - your output differs from reference:" 
    echo " compare OUT_PARALLEL_TEST to reference OUT_PARALLEL, "
    echo " or you may want to run individual tests with e.g. \`make 2b.out'"
    echo ""
    echo " Please read the NOTE on tests in the README_PARALLEL file"
    echo "-------------------------------------------------------------------"
    exit  
fi

exit
