#!/bin/bash
#
#
# This script is launched by fixpdb.php and fix the PDB for the current gap $1
# syntax: 
#         join_gap.bash gapn
#         where gapn is an integer labeling the n-th gap in the gaps file   
#
export LC_NUMERIC="en_US.UTF-8"
OLD_PATH=$PATH
orac_home=`echo $PWD | sed "s?/tools/pdbrestore/www??g"`
bin=$orac_home/bin
prima=$orac_home/tools/primadorac
prima_src=$orac_home/tools/primadorac/src
scripts=$orac_home/tools/scripts 
export PATH=$bin:$prima:$scripts:$prima_src:$OLD_PATH:
rm orac.out 0.in x.pdb tmp.pdb rec+gap.pdb tmp.in tmp.out  test_file

pdbcode=$2
water=$3
lig=$4
metals="BE CU NI PT ZN CO PD AG CR FE MG V MN HG CD YB SN PB EU SR SM BA RA AL CR TL Y LA CE PR ND SM EU GD TB DY ER TM LU HF ZR CE U PU TH"
cofactors="HEM HEA HEC HEC HED HEF HEO BCL BPH"
if [ -s lig.pdb ] ; then
    nlig=`awk '{nres=substr($0,23,4); if(nres!=nres_old){dr++}; nres_old=nres} END{print dr}' lig.pdb`
fi
# function solvate (called if water != 0 and gaps=0) 
function solvate {
    input_file=$1
    echo "END" > END
    cat rec.pdb END | grep -v REM > tmp.pdb
    $orac_home/tools/orient/orient
    boxsize=`grep CRYSTAL setup.tmp | awk '{print $2}'`
    linked=`grep LINKED_CELL  potential.tmp | awk '{print $2}'`
    ngrid=`grep EWALD   potential.tmp | awk '{print $4}'`
    nwat=`grep GENERATE  solvent.tmp | awk '{print $2 - int(0.08*$2)}'`
    watermodel=$2
    if [ $nwat -gt  32 ]; then
       water=0 
       echo "<br> <span style="color:#FF0000"> number of water: $nmolw  </span>" 
       echo "<br> <span style="color:#FF0000"> Solvated MD exceeds max allowed mol </span>" 
    fi
    sed "s/#toinsert//g" $input_file | awk '{if ($NF!="#toremove") print}' > md.in
    sed -i "s/BOXSIZE/$boxsize/g" md.in
    sed -i "s/NLINK/$linked/g" md.in
    sed -i "s/NWAT/$nwat/g" md.in
    sed -i "s/NGRID/$ngrid/g" md.in
    sed -i "s/watermodel/$watermodel/g" md.in
    sed -i "s/pdbcode/${pdbcode}_fixed.pdb/g" md.in
    cp $orac_home/pdb/water.pdb . 
    rm *.tmp              # purge orient files 
    echo "<br> ===  NPT simulation of solvated $pdbcode done." 
    $orac_home/src/GNU-FFTW-OMP/orac < md.in >& md.out
    if [ -s md_1.pdb ] ; then 
	n=`tail -2 md_1.pdb | head -1  | awk '{print $2+2}'`
	tail -$n md_1.pdb > ${pdbcode}_fixed+wat.pdb
	$orac_home/tools/scripts/ora-gro_pdb.awk ${pdbcode}_fixed+wat.pdb > tmp.pdb; mv tmp.pdb ${pdbcode}_fixed+wat.pdb  
	echo "<br> <span style="color:#00FFAA";> Solvation successuful. ${pdbcode}_fixed+wat.pdb generated </span>"
    else
	echo "<br> Solvation failed.."
    fi 
}

# THE FOLLOWING  IS PROCESSED WHEN GAPS == 0  

if [ ! -s chain.pdb ]; then
    echo " <p style="color:#FF0000";>  chain.pdb is empty.. check chain selection </p>"
    echo " chain.pdb is empty.. check chain selection" > failures/$pdbcode.pdb
    exit
fi


sed "s?ORAC_HOME?$orac_home?g" template_0.in  > tmp.in
if [ $1 == 0 ]; then
    awk '{print tolower($1), " !", $2}' SEQ.full | grep -v "xxx"  > seq
    sed -i "s/his/hse/g" seq  # fix histidines
    sed -i "s/hip/hsp/g" seq  # fix histidines
    sed -i "s/hid/hsd/g" seq  # fix histidines
    sed -i "s/hie/hse/g" seq  # fix histidines
    sed -i "s/cys/cysh/g" seq # fix cysteines (momentarily)
    nresgap=`wc seq | awk '{print $1}'`
# fix termini 
    awk -v n=$nresgap '{if(NR==1) {print $1"-h", " ! ",$3} else if(NR==n)  {print $1"-o", " ! ",$3} else {print}}' seq > tmp; mv tmp seq

    cat seq  tmp.in | awk '{i++;res[i]=$0; if($2=="sequence") {ok=1;n=i-1}; if(ok==1) {print}; if($1=="JOIN" && $2=="SOLUTE") {for(j=1; j<=n; j++){print res[j]} }}' > 0.in
    sed -i "s/rec+gap/rec/g" 0.in
    ./fix_cys.bash 0.in rec.pdb
    
    #get pdb coordinates of metals
    rm metals.pdb >& /dev/null
    chain=`head -1 chain.pdb | awk '{print substr($0,22,1)}'`
    for i in $metals; do 
	awk -v c=$chain -v mtl=$i '{if($1=="ENDMDL") {exit}; if(( substr($0,22,1)==c || substr($0,22,1)==" " ) && $1~"^HETATM" && $NF==mtl)  print }' $pdbcode.pdb >> metals.pdb
    done
    
    #get residue  type of metals
    rm metals.res >& /dev/null
    for i in $metals; do 
	awk -v c=$chain -v mtl=$i '{if($1=="ENDMDL") {exit}; if(( substr($0,22,1)==c || substr($0,22,1)==" " ) && $1~"^HETATM" && $NF==mtl)  print  $NF}' $pdbcode.pdb | awk '{print tolower($1)}' >> metals.res
    done
    
    if [ -s metals.pdb ] ; then
	echo "fixing metals .. "
	cat rec.pdb metals.pdb| grep -v "END" > tmp.pdb; mv tmp.pdb rec.pdb
	sed $'/endsolute/{e cat metals\.res\n}' 0.in > tmp.in # insert residue metal types BEFORE 'endsolute' in template 
	mv tmp.in 0.in
    fi
    
#   fix the ligand as last residue to avoid problems with ADD_TPG and residue numberibg    
    if [ -s lig-p1.pdb -a -s prima/prima_ok ] ; then
	echo "<br>fixing ligand"
	cat rec.pdb lig-p?.pdb| grep -v "END" > tmp.pdb; mv tmp.pdb rec.pdb 
	for i in $(seq 1 $nlig); do
	    sed -i '/endsolute/i lig ' 0.in
	done
	cp 0.in tmp.in
	awk '{if ($1=="&PARAMETERS") {print $0; print "   READ_TPG_ASCII lig-p.tpg"; print "   READ_PRM_ASCII lig-p.prm"} else {print}}' tmp.in > 0.in
    fi
    sed -i "s/THREADS 1/THREADS 6/g" 0.in   
# now transfor all CYS-binding metals from cysh to cysm
    if [ -s metals.pdb ] ; then
	fix_cys_zn.bash 0.in rec.pdb
    fi
    sed -i "s/TIME 1.0/TIME 150.0/g" 0.in   
    sed -i "s/1.0 OPEN 2.pdb/15.0 OPEN 2.pdb/g" 0.in   
    sed -i "s/THREADS 1/THREADS 6/g" 0.in   
    $orac_home/src/GNU-FFTW-OMP/orac < 0.in > tmp.out

    grep NaN tmp.out > test_file
    if [ ! -s test_file -a -s 2.pdb ] ; then
	echo "<br> <b>" 
	tail -35 tmp.out  | grep Tstep | awk '{print "        Time/Energy = "$3,$6}'
	echo "</b>"
	n=`tail -2 2.pdb   | head -1 | awk '{print $2+2}'`
	tail -$n 2.pdb > ${pdbcode}_fixed.pdb
	sr=`head -1 chain.pdb | awk '{printf "%02d\n", substr($0,23,4)}'`
	if [ $sr != 1 ]; then
	    awk -v sr=$sr '{if($1=="ATOM") {printf "%22s",substr($0,1,22); printf "%4d",$5+sr-1; printf "%28s\n",substr($0,27,28)}}' ${code}_fixed.pdb > tmp.pdb;
	    mv tmp.pdb ${code}_fixed.pdb
	fi
	if [ $water != 0 ] ; then
	    size=`wc -l chain.pdb | awk '{print $1*2}'`
	    if [ $size -gt 9000 ]; then
		echo " selected chain has more than 9K atoms; no solvated protein will be produced"
		water=0
	    else
		solvate 0.in $water
	    fi
	fi
	$orac_home/tools/scripts/ora-gro_pdb.awk ${pdbcode}_fixed.pdb > tmp.pdb; mv tmp.pdb ${pdbcode}_fixed.pdb  
	awk '{if ($3=="endsolute") {ok=0}; if(ok==1) {print}; if($1=="JOIN" && $2=="SOLUTE")  {ok=1}}' 0.in > input_sequence
	if [ $lig != "NONE" ]; then
	    cp prima/file.itp lig-g.itp
	    tar zcf uploads/${pdbcode}_fixed.tar.gz ${pdbcode}_fixed*.pdb input_sequence lig-[pg].*
	else
	    tar zcf uploads/${pdbcode}_fixed.tar.gz ${pdbcode}_fixed*.pdb input_sequence
	fi
	echo "</pre>"
	echo "<h2> <span style="color:#00AA00";> PDB fixed!!</h2> </span> "
	echo "<h3> Get it <a href=uploads/${pdbcode}_fixed.tar.gz> here! </a> " 
	echo " &nbsp &nbsp &nbsp &nbsp &nbsp &nbsp &nbsp &nbsp &nbsp &nbsp &nbsp &nbsp go back to <a href=pdbrestore.html>PDBrestore</a> </h3>" 
    else
	echo " <br>  <span style="color:#FF0000";> Something went awry. So sorry.... </span>"
	echo " Something went awry. No optimized pdb was generated" > failures/$pdbcode.pdb
	nfndgap=`wc gaps | awk '{print $1}'`
	if [ ! -z $nfndgap ]; then
	    echo "   $nfndgap gaps were found, while FIX GAPS is NO;"
	    echo "   Try to rerun by setting FIX GAPS to YES <br>"
	fi 
	echo " &nbsp &nbsp &nbsp &nbsp &nbsp &nbsp &nbsp &nbsp &nbsp &nbsp &nbsp &nbsp go back to <a href=pdbrestore.html>PDBrestore</a> and try again.."  
    fi
    exit
fi
    
# THE FOLLOWING  IS PROCESSED WHEN GAPS != 0  

# prepare 0.in from template with current gap 


first=`head -$1 gaps | tail -1 | awk '{print $2}'`
last=`head -$1 gaps | tail -1 | awk '{print $3}'`
echo " " $first $last
awk '{print tolower($1), " !", $2}' SEQ.full | grep -v "xxx" | awk -v n1=$first -v n2=$last '{if($3!=n1 && $3!=n2) print}'  > seq.with-gap
nresgap=`wc seq.with-gap | awk '{print $1}'`
sed -i "s/his/hse/g" seq.with-gap  # fix histidines
sed -i "s/hid/hsd/g" seq.with-gap # fix histidines
sed -i "s/hie/hse/g" seq.with-gap # fix histidines
sed -i "s/hip/hsp/g" seq.with-gap # fix histidines
sed -i "s/cys/cysh/g" seq.with-gap # fix cysteines (momentarily) 
# fix termini 
awk -v n=$nresgap '{if(NR==1) {print $1"-h", " ! ",$3} else if(NR==n)  {print $1"-o", " ! ",$3} else {print}}' seq.with-gap > tmp; mv tmp seq.with-gap 

sed "s?ORAC_HOME?$orac_home?g" template_0.in  > tmp.in
cat seq.with-gap  tmp.in | awk '{i++;res[i]=$0; if($2=="sequence") {ok=1;n=i-1}; if(ok==1) {print}; if($1=="JOIN" && $2=="SOLUTE") {for(j=1; j<=n; j++){print res[j]} }}' > 0.in

((ng=$last + 1 ));


# launch orac with SMD on gap

((jr=-90));
J=`echo $1 | awk '{printf "%02d",$1}'`
for i in GG$J.pdb  G90G$J.pdb  G180G$J.pdb  G270G$J.pdb; do
    if [ -s fort.101 ] ; then
	unk=`awk '{print $1}' fort.101`
	cat subseq.$J 0.in | sed "s/$unk/ala/g" | ./insert_seq.awk -v ng=$ng > tmp.in
    else
	cat subseq.$J 0.in | insert_seq.awk -v ng=$ng > tmp.in
    fi
    ((jr=jr+90));     
    sed  "s/GG$J.pdb/$i/g" GAP$J.in > GAP${J}_$jr.in
    sed -i "s/energy_then_die/ \! energy_then_die/g" GAP${J}_$jr.in
    rm 1.pdb >& /dev/null
    $orac_home/src/GNU-FFTW-OMP/orac < GAP${J}_$jr.in > orac.out
    n=`tail -2 1.pdb | head -1  | awk '{print $2+2}'`
    jump=0
    if [ ! -z $n ]; then 
	tail -$n 1.pdb | grep -v REMARK | grep -v END> x.pdb 
    else
	echo "              something went wrong with gap $J.." 
	jump=1
    fi

#   prepare rec+gap.pdb    
    if [ $jump == 0 ]; then # skip if gap minimization failed 
    awk -v n=$first '{if(int(substr($0,23,4))!=n) print }' rec.pdb |  awk -v n=$last '{if(int(substr($0,23,4))!=n) print }' > tmp.pdb 


    cat x.pdb  tmp.pdb | insert_gap.awk -v ng=$ng > rec+gap.pdb

    #   launch orac on joined sequence
    ./fix_cys.bash tmp.in rec+gap.pdb
    $orac_home/src/GNU-FFTW-OMP/orac < tmp.in > tmp.out
    check_end=`grep COMPLETED tmp.out | awk '{print $2}'`
    grep NaN tmp.out > test_file
    echo "        rotation " $jr, " file " $i   
    if [ ! -s test_file  ] ; then
#       No NaN: first test passed. Relaunch orac  full minimizations
	echo "        No NaN for t=1.0. Launching full minimization"
	sed -i "s/THREADS 1/THREADS 6/g" tmp.in   
	sed -i "s/TIME 1.0/TIME 250.0/g" tmp.in   
	sed -i "s/1.0 OPEN 2.pdb/15.0 OPEN 2.pdb/g" tmp.in   
	$orac_home/src/GNU-FFTW-OMP/orac < tmp.in > tmp.out
    fi
    
    # check that full minimization  run was correct
    
    grep NaN tmp.out > test_file
    
    if [ ! -s test_file  ] ; then
#       check for ***** and for stretching OK 	
	tstep=`tail -35 tmp.out  | grep Tstep | awk '{print $3}'`
	energy=`tail -35 tmp.out  | grep Tstep | awk '{if($6>0 || substr($6,1,1)=="*") {print "1000"}else {print "ok"}}'`
	if [ $energy == "1000" ]; then
	    echo "        orientation " $jr " failed full minimization"
	    echo "        orientation " $jr " failed full minimization" > error
	    tail -35 tmp.out  | grep Tstep | awk '{print "        Time/Energy = "$3,$6}'
	else
	    n=`tail -2 2.pdb   | head -1 | awk '{print $2+2}'`
	    cp tmp.in tmp$J.in ; cp tmp.out tmp$J.out ; cp rec+gap.pdb rec+gap$J.pdb
	    tail -35 tmp.out  | grep Tstep | awk '{print "        Time/Energy = "$3,$6}'
	    tail -$n 2.pdb > ${pdbcode}_gap${J}_last.pdb
	    echo "        Exiting gap "$2 " rotation " $jr
	    cp x.pdb gap_ok$J.pdb
	    rm error >& /dev/null
	    exit
	fi
    else
	echo "        orientation " $jr " failed " 
	echo "        orientation " $jr " failed for gap " $1  > error 
    fi
    fi
done
