#!/bin/bash 
# this is a wrapper to compute ligand orac topology and
# pdf file using primadorac for type assignment and mopac7
# for ESP charges (AM1)
#

#  CONTROL PARAMETES AND DEFINITIONS ################################################

DEBUG=1                           # set this to 0 for www mirror 
WWW=0                             # set this to 1 for www mirror 
geomforce=0

while getopts ":fdwr" opt; do 
    case $opt in 
	f) 
	    geomforce=1; 
	    ;;
	d) 
	    DEBUG=0; 
	    ;;
	w) 
	    WWW=1; DEBUG=0; 
	    ;;
	\?) 
	    echo "invalid option"
	    ;;
    esac
done

orac_home=/home/procacci/ORAC/trunk
mopac_exe=$orac_home/tools/primadorac/mopac7-1.11/src/mopac7
orac_exe=$orac_home/src/GNU/orac
primadorac_exe=$orac_home/tools/primadorac/src/primadorac
rms90_exe=$orac_home/tools/primadorac/www/new_rms
GAFF=$orac_home/tools/primadorac/www/gaff2016.prm
Oin=$orac_home/tools/primadorac/sdf/0.in
ir=$orac_home/tools/primadorac/www/ir.awk
irg=$orac_home/tools/primadorac/www/ir.gplt
readme=$orac_home/tools/primadorac/www/README.text
dep=0
if [ ! -f  $mopac_exe ]; then 
    echo " mopac7 executable not found in " $mopac_exe
    dep=1
fi
if [ ! -f  $orac_exe ]; then 
    echo " orac executable not found in " $orac_exe
    dep=1
fi
if [ ! -f  $primadorac_exe ]; then 
    echo " orac executable not found in " $primadorac_exe
    dep=1
fi

# check dependencies
if [ ! -f $orac_home/tools/primadorac/sdf/0.in ] ; then 
    echo " 0.in not found in this dir" 
    dep=1
fi 

if [ ! -f $orac_home/lib/gaff2016.prm ] ; then 
    echo " gaff2016.prm not found in this dir" 
    dep=1
fi 


if [ ! -f $orac_home/tools/primadorac/www/new_rms ] ; then 
    echo " new_rms not found in this dir" 
    dep=1
fi 

if [ ! -f $orac_home/tools/primadorac/www/aug-bnd.awk ] ; then 
    echo " aug-bnd not found in this dir" 
    dep=1
fi 

if [ ! -f $orac_home/tools/primadorac/www/aug-ang.awk ] ; then 
    echo " aug-ang not found in this dir" 
    dep=1
fi 
if [ ! -f $orac_home/tools/primadorac/www/aug-pto.awk ] ; then 
    echo " aug-pto not found in this dir" 
    dep=1
fi 
if [ ! -f $orac_home/tools/primadorac/www/aug-pit.awk ] ; then 
    echo " aug-pit not found in this dir" 
    dep=1
fi 
if [ ! -f $orac_home/tools/primadorac/www/upd.awk ] ; then 
    echo " aug-pit not found in this dir" 
    dep=1
fi 

if [ $dep != "0" ] ; then 
    exit
fi

function run_mopac {

export FOR005=$1.dat	## input
export FOR006=$1.out	## output
export FOR009=$1.res	## restart
export FOR010=$1.den	## density matrix
export FOR011=$1.log	## logfile
export FOR012=$1.arc	## archive/summary file

$mopac_exe > $1.OUT

if test -f SHUTDOWN
then
	rm SHUTDOWN
fi
} 

#  FUNCTIONS ##########################################################################

function messages { 
    if [ $1 == "Nofile" ] ; then  
	echo " "  
	echo " "  
	echo "This script computes the ORAC topology file from and input pdb file"
	echo " "  
	echo "Syntax: primadorac [opt] file"
	echo "        where 'file' is the pdb file of the ligand (sdf/xyz/pdb formats)"
	echo " "
        echo "Options"
        echo ""
        echo "   -f "
        echo "      force use of the Mopac Geom when some parameters are not found in GAFF2"
        echo ""
        echo "   -w "
        echo "      for www mirroring (produces the tar.gz archive in molecules dir) "
        echo ""
        echo "   -d "
        echo "      set DEBUG to 0 (produce html messages as in www mirroring )"
        echo ""
        echo ""
	echo " N.B. -The charge of the compound must be specified in the last two characters "
	echo "       of the name preceding the format specification: "
	echo "       e.g. acetic_acid-1.sdf --> charge is assumed -1 electrons"  
	echo "       e.g. ammonium+1.sdf    --> charge is assumed +1 electrons"  
	echo " "
    fi
    if [ $1 == "Filenot" ] ; then  
	echo "ERR: no file found/uploaded "  
    fi
    if [ $1 == "AliasR" ] ; then  
	echo "ERR: Unknown " $babelout "found when babeling <br> " 
    fi
    if [ $1 == "beyondN" ] ; then 
	echo "ERR (primadorac): Can't go on: more than 1 molecule or conn.gt.7 or type 'x' in <br> " $name.$format
    fi
    if [ $1 == "Mopacfails" ] ; then
	echo "..something awry with MOPAC Optimization; <br> ..redo assignment with original coordinates .. <br>"
    fi
    if [ $1 == "OriginOK" ]; then 
	echo "  --->Assignment OK with original geometry <br>"
        echo " <br> <br> ---> results for <b> " $name " </b> can be found at  <a href='molecules/$name.tar.gz'> here </a> as a compressed tar archive" 
    fi
    if [ $1 == "Augmenting" ]; then 
	echo " <br> <br>  ** ERROR ** Something went wrong with orac miminization.<br>"
	echo "             ----prm file cat not produced or empty        <br> "
	echo "         <b> ----Could be missing parameters in gaff2016.</b>  <br> "
	echo "             ----ORAC output follows <br><br><br>"

	echo "<pre>" 
	cat $2 $3 $4 $5
	echo "</pre>"
    fi
    if [ $1 == "MopacOK" ]; then     
	echo "  --->Assignment OK with MOPAC7 geometry <br>"
        echo " <br> <br> ---> results for <b> " $name " </b> can be found at  <a href='molecules/$name.tar.gz'> here </a> as a compressed tar archive" 
    fi
    if [ $1 == "Err0" ]; then     
	echo "ERR: found ZERO atoms in the file. <br>            "
	echo "    is this a pdb/sdf/xyz/mdl/mol2/gau file?                    "
    fi
    if [ $1 == "Err1" ]; then     
	echo "ERR:   two many atoms in the file. natoms =" $natoms "<br>"
	echo " maximum allowed is natoms=110         "
    fi
    if [ $1 == "ErrMopac" ]; then     
	echo "ERR: something wrong with mopac; check the file <a href=$name.OUT> $name.OUT </a>"  
    fi
    if [ $1 == "AugOrig" ]; then     
	echo "  <br> --->Assignment OK with augmented parameters and original geometry <br>"
        echo " <br> <br> ---> results for <b> " $name " </b> can be found at  <a href='molecules/$name.tar.gz'> HERE </a> as a compressed tar archive"
    fi
    if [ $1 == "domean" ] ; then
        echo " <br> Confidence level (see README.txt in the archive)"
        rmsmean
    fi
}
function rmsmean {
    cat $readme > README.txt
    echo "  "  >> README.txt
    echo "  "  >> README.txt
    echo " The following is the mean square root displacement in Angs between ">> README.txt
    echo " the ORAC/GAFF2016 minimized structure and the MOPAC7 minimized structure or ">> README.txt
    echo " the original structure in case assignment based on the MOPAC7 optimized" >> README.txt 
    echo " structure failed"  >> README.txt
    echo "  "  >> README.txt
    grep ATOM $name-min-p.pdb > tmp.min
    meand=`$rms90_exe  $name-p.pdb  tmp.min | grep meanRMS` 
    echo $meand >> README.txt
    echo $meand 
}

#############################execution starts HERE ###############################################

if [ $# -ne 1 ] && [ $# -ne 2 ]; then  
    messages Nofile
    exit
fi

if [ ! -f ${!#} ]; then 
    messages Filenot
    exit 
fi
#
# find format and charges
#
filename=${!#}
curdir=`pwd`/
format=`echo $filename| awk '{nf=length($1)-2; print substr($1,nf,3)}'` 
if [ $format == "ol2" ] ; then 
    format=`echo $filename| awk '{nf=length($1)-3; print substr($1,nf,4)}'` 
fi

name=`echo $filename | sed "s/\.$format//g"`  
if [ $format != "mol2" ] ; then
    test_charge=`echo $filename| awk '{nf=length($1)-5; print substr($1,nf,2)}'`
else
    test_charge=`echo $filename| awk '{nf=length($1)-6; print substr($1,nf,2)}'`
fi 

formats[1]="sdf"
formats[2]="pdb"
formats[3]="mdl"
formats[4]="mol2"
formats[5]="xyz"
formats[6]="gau"

OK=0
for i in {1..6} ; do
    if [ ${formats[$i]} == $format ] ; then OK=1 ; fi
done
if [ $OK != "1"  ]  ; then 
    echo '"'$format'"' " unrecognized format"
    echo " allowed format extensions are :" "'.xyz' "  "'.sdf' " "'.pdb'" "'.mdl'" "'.mol2'" "'.gau'" 
    exit 
fi
charge="nil"
if [ `echo $test_charge | awk '{print substr($1,1,1)}'` == "-" ]; then
    charge=$test_charge
fi
if [ `echo $test_charge | awk '{print substr($1,1,1)}'` == "+" ]; then
    charge=$test_charge
fi
#
#  standardize input according to babel
#

dos2unix $name.$format >& /dev/null
babel $name.$format $name.pdb  >& x.0 

babelout=`grep  "interpreted" x.0 | awk '{print $1}'` ; rm x.0

if [ ! -z $babelout ]; then 
    if [ $DEBUG == "1" ]; then 
	printf "%10s\n"  " AliasR "
    else
	messages AliasR
    fi
    exit 
fi

# do a simple test to check whether the file contains more than 1 mol 
fatal=`$primadorac_exe < $name.pdb  | tail -1 | awk '{print $2}'`

if [ ! -z $fatal ]; then 
    if [ $DEBUG == "1" ]; then 
	printf "%10s\n"  " beyondN "
    else
	messages beyondN
    fi
   exit 
fi

# check for charges and reset it if found in sdf 

chargesdf=`grep "M  CHG  1"  $name.$format | awk '{print  $5}'` 
if [ ! -z $chargesdf ]; then 
    charge=$chargesdf
fi

#
# construct mopac input file
#

(( natoms=`awk '{if($1=="ATOM" || $1=="HETATM") {print }}' $name.pdb | wc -l `  ))

if [ $DEBUG == "1" ]; then printf "%30s %5d atoms"  $name $natoms ; fi 

rm $name.dat >& /dev/null

if [ $charge != "nil" ] ; then 
    echo "AM1 MMOK XYZ CHARGE="$charge > $name.dat
else
    echo "AM1 MMOK XYZ" > $name.dat
fi
echo $name      >> $name.dat
echo "AM1 with geometry optimization" >> $name.dat

babel $name.pdb  $name.mop >& /dev/null  
tail -$natoms $name.mop  > x.0
cat $name.dat x.0 > y.0 ; mv y.0 $name.dat ; rm x.0

if [ $DEBUG == "1" ] ; then 
    if [ $charge != "nil" ] ; then 
	printf "%5d  charges "  $charge 
    else
	printf " 0 charges "  
    fi 
fi 

if [ $natoms -eq 0 ]; then
    if [ $DEBUG == "1" ] ; then 
	printf "%6s\n"  " Err0 " 
	exit
    else
	messages Err0
	exit
    fi
fi 

if (( $natoms >= 110 )) ; then
    if [ $DEBUG == "1" ] ; then 
	printf "%6s\n"  " Err1 " 
	exit
    else
	messages Err1 
	exit
    fi
fi 

#
# Run MOPAC 
#

run_mopac $name

mopac_ok=`grep "MOPAC DONE" $name.OUT | awk '{print $3}'`

if [ -z $mopac_ok ] ; then
    if [ $DEBUG == "1" ] ; then 
	printf "%11s\n"  " ErrMopac " 
	exit 
    else
	messages ErrMopac
	exit
    fi
fi
#
#  get Mulliken charges
#
awk -v nat=$natoms 'BEGIN{igo=0;j=0} {if($1=="NET" && $2=="ATOMIC" && $3=="CHARGES") {igo=1} ; if(igo==1) {i++}; if(i>=4) {j++; if(j<=nat){printf "%12.6f\n",$3}}}' $name.OUT   > tmp.esp
rm $name.arc* $name.log
#
#  get Optimized cartesian coordinates
#
lig3name=`echo $name | awk '{print substr($1,1,3)}'` 
awk -v nat=$natoms -v lig=$lig3name 'BEGIN{igo=0;j=0} {if($1=="CARTESIAN") {igo++} ; if(igo==2) {i++}; if(i>=5) {j++; if(j<=nat){printf "HETATM%5d  %-3s %3s     1    %8.3f%8.3f%8.3f\n", $1,$2,lig,$3,$4,$5}}}' $name.OUT   > tmp.pdb
#
#  save original cartesian coordinates 
#
awk '{if($1=="ATOM" || $1=="HETATM") {i++; il=1; atom_name=substr($0,13,4); lig_name=substr($0,18,3);x=substr($0,31,8); y=substr($0,39,8); z=substr($0,47,8); printf "HETATM%5d %-4s %3s%6d    %8.3f%8.3f%8.3f\n", i,atom_name,lig_name,il,x,y,z}}' $name.pdb  > tmp_1.pdb

rm $name.arc* $name.log >& /dev/null

paste -d " " tmp.pdb tmp.esp > tmp.PDB;  $primadorac_exe < tmp.PDB > tmp.prd.out 

ligname=`echo | awk '{if(NR==1){print tolower(substr($0,18,3))}}' tmp.PDB`

mv "$ligname".pdb $name-p.pdb
mv "$ligname".tpg $name-p.tpg
mv "$ligname".log $name-p.log

sed "s/NAME/$name/g" $Oin  | sed "s/ligand/$ligname/g"  | awk -v gaff=$GAFF '{if($2=="GAFF"){print $1,gaff} else {print}} ' > tmp.in
$orac_exe < tmp.in > tmp.out 

echo "#" > $name-p.prm
echo "# ------ Done with primadorac1.0 --------" >>$name-p.prm
echo "#" >> $name-p.prm

awk 'BEGIN{igo=0}{if ($1=="-->" && $2=="Begin"){igo=1}; if(igo==1) print; if ($1=="-->" && $2=="End"){igo=0}}' tmp.out | grep -v "\-\->" >> $name-p.prm

prm_ok=`tail -1 $name-p.prm`
if [ $prm_ok != "END" ] ; then
    if [ $DEBUG != "1" ] ; then messages Mopacfails; fi
    if [[ $geomforce == "1" ]]; then 
	paste -d " " tmp.pdb tmp.esp > tmp.PDB;  $primadorac_exe < tmp.PDB
    else
	paste -d " " tmp_1.pdb tmp.esp > tmp.PDB;  $primadorac_exe < tmp.PDB
    fi
    ligname=`echo | awk '{if(NR==1){print tolower(substr($0,18,3))}}' tmp.PDB`
    mv $ligname.pdb $name-p.pdb
    mv $ligname.tpg $name-p.tpg
    mv $ligname.log $name-p.log
    sed "s/NAME/$name/g" $Oin  | sed "s/ligand/$ligname/g"  | awk -v gaff=$GAFF '{if($2=="GAFF"){print $1,gaff} else {print}} ' > tmp.in
    $orac_exe < tmp.in > tmp.out 
    echo "#" > $name-p.prm
    echo "# ------ Done with primadorac1.0 --------" >>$name-p.prm
    echo "#" >> $name-p.prm
    
    awk 'BEGIN{igo=0}{if ($1=="-->" && $2=="Begin"){igo=1}; if(igo==1) print; if ($1=="-->" && $2=="End"){igo=0}}' tmp.out | grep -v "\-\->" >> $name-p.prm

    prm_ok=`tail -1 $name-p.prm`
    if [ $prm_ok == "END" ] ; then
	awk 'BEGIN{igo=0}{if ($1=="-->" && $2=="Begin"){igo=1}; if(igo==1) print; if ($1=="-->" && $2=="End"){igo=0}}' tmp.out | grep -v "\-\->" >> $name-p.prm
        if [ $DEBUG == "1" ]; then 
	    printf "%10s"  " OriginOK "
	else
	    messages OriginOK
	fi
    else
 	grep "*BND " tmp.out > tmp.bnd
	grep "*ANG " tmp.out > tmp.ang
	grep "*PTO " tmp.out > tmp.pto
	grep "*PIT " tmp.out > tmp.pit
	if [ $DEBUG != "1" ]; then  messages Augmenting tmp.bnd tmp.ang tmp.pto tmp.pit; fi
	if [ -s tmp.bnd ] || [ -s tmp.ang ] || [ -s tmp.pto ] || [ -s tmp.pit  ] ; then
	    cp $GAFF tmp.prm
#           augment gaff2016.prm with primadorac guess
	    if [  -s tmp.bnd ] ; then
		cat tmp.bnd $name-p.pdb | $orac_home/tools/primadorac/www/aug-bnd.awk -v name=$name > $name.bnd
		$orac_home/tools/primadorac/www/upd.awk -v type=BND -v n=`wc -l $name.bnd | awk '{print $1 }'` $name.bnd tmp.prm > tmp1.prm
		mv tmp1.prm  tmp.prm
	    fi
	    if [  -s  tmp.ang ] ; then
		cat tmp.ang $name-p.pdb | $orac_home/tools/primadorac/www/aug-ang.awk -v name=$name > $name.ang
		$orac_home/tools/primadorac/www/upd.awk -v type=ANG -v n=`wc -l $name.ang | awk '{print $1 }'` $name.ang tmp.prm > tmp1.prm
 		mv tmp1.prm  tmp.prm
	    fi
	    if [ -s tmp.pto ] ; then
		cat tmp.pto $name-p.pdb  | $orac_home/tools/primadorac/www/aug-pto.awk -v name=$name > $name.pto
		$orac_home/tools/primadorac/www/upd.awk -v type=PTO -v n=`wc -l $name.pto | awk '{print $1 }'` $name.pto tmp.prm > tmp1.prm
		mv tmp1.prm  tmp.prm
	    fi
	    if [ -s  tmp.pit ] ; then
		cat tmp.pit $name-p.pdb | $orac_home/tools/primadorac/www/aug-pit.awk  -v name=$name > $name.pit
		$orac_home/tools/primadorac/www/upd.awk -v type=PIT -v n=`wc -l $name.pit | awk '{print $1 }'` $name.pit tmp.prm > tmp1.prm
		mv tmp1.prm  tmp.prm
	    fi
	    sed "s/NAME/$name/g" $Oin  | sed "s/ligand/$ligname/g"  | sed "s/GAFF/tmp\.prm/g" > tmp.in
	    $orac_exe < tmp.in > tmp.out 
	    echo "#" > $name-p.prm
	    echo "# ------ Done with primadorac1.0 --------" >>$name-p.prm
	    echo "#" >> $name-p.prm
	    echo "# WARNING!!!: the following parameters were not found in official amber.orf gaff2016.prm file" >> $name-p.prm
	    echo "# WARNING!!!: and are *GUESSED*  by primadoroc according  to simple rules." >> $name-p.prm
	    if [ -s $name.bnd ] ; then awk '{if(NR>1) print "# Bonds:",$0}' $name.bnd >> $name-p.prm  ; fi
	    if [ -s $name.ang ] ; then awk '{if(NR>1) print "# Bendings:",$0}' $name.ang >> $name-p.prm  ; fi
	    if [ -s $name.pto ] ; then awk '{if(NR>1) print "# Proper Torsions:",$0}' $name.pto >> $name-p.prm  ; fi
	    if [ -s $name.pit ] ; then awk '{if(NR>1) print "# Improper Torsions:",$0}' $name.pit >> $name-p.prm  ; fi
 	    awk 'BEGIN{igo=0}{if ($1=="-->" && $2=="Begin"){igo=1}; if(igo==1) print; if ($1=="-->" && $2=="End"){igo=0}}' tmp.out | grep -v "\-\->" >> $name-p.prm
	   
	    if [ $DEBUG == "1" ] ; then 
		printf "%12s"  " AugOrig "
	    else
		messages AugOrig
	    fi
	fi
    fi
else
    if [ $DEBUG == "1" ] ; then 
        printf "%10s"  " MopacOK "
    else
	messages MopacOK
    fi
fi
# final test on confidence level 
if [ $DEBUG == 1 ] ; then
    rmsmean 
else
    messages domean
fi
if  [ $WWW == 1 ] ; then
# compute IR spectrum
    (( nsplit = $natoms +2 ))
    split -a 3 -l $nsplit  -d $name-p.frq
    if [ ! -f  $ir ]; then 
	echo " ir.awk not found in " $ir
	exit
    fi
    if [ ! -f  $irg ]; then 
	echo " ir.gplt not found in " $irg
	exit
    fi
    for i in x[0-9]?? ; do 
	$ir -v n=$natoms tmp.esp $i 
	rm $i 
    done > tmp.ir
    sed "s/NAME/$name\-IR\-p/g" $irg > tmp.gplt 
    gnuplot tmp.gplt 
    if [ -d "molecules" ]; then
	tar zcvf molecules/$name.tar.gz $name*-p.* README.txt >& /dev/null
    else
	tar zcvf S_$name.tar.gz $name*-p.* README.txt >& /dev/null
    fi
    rm tmp.* $name.* *.mop *.pdb  *-p.*  *.dat *.out *.log *.OUT *.pdb *.tpg  ref.min README.txt >& /dev/null; 
    rm *.sdf *.xyz *.mol2 *.mdl >& /dev/null
fi
exit


