#!/bin/bash
export LC_NUMERIC="en_US.UTF-8"
code=$1
lig=$2
nlig=0
chain=$3
dogaps=$4
OLD_PATH=$PATH
orac_home=`echo $PWD | sed "s?/tools/pdbrestore/www??g"`
bin=$orac_home/bin
prima=$orac_home/tools/primadorac
prima_src=$orac_home/tools/primadorac/src
www=$orac_home/tools/primadorac/www
scripts=$orac_home/tools/scripts
sed "s?ORAC_HOME?$orac_home?g" steer_template.in  > steer_tmp.in
export PATH=$bin:$prima:$scripts:$prima_src:$OLD_PATH:
HOME=/home/procacci
mv $code.pdb codepdbsaved
mv $code.OUT codeoutsaved
mv $code.html codehtmlsaved
rm -fr error *.pdb  *.out  tmp.disulf prima GAP*.out GAP*.in fort.*  [0-9]???.html  *autopsf* x.? rec.psf WRK* *.OUT seq.* SEQ* G*G tmp.* metals.* >& /dev/null

mv codepdbsaved $code.pdb 
mv codeoutsaved $code.OUT 
mv codehtmlsaved $code.html 


# function ligand(s) 
function ligands {
    awk -v c=$chain -v l=$1 '{if($1=="ENDMDL") {exit}; if(( substr($0,22,1)==c || substr($0,22,1)==" " )&& substr($0,18,3)==l) print }' $code.pdb > lig.pdb
    if [ ! -s lig.pdb ]; then 
	echo "<span style="color:#FF0000">no ligand <b>$1</b> found in chain <b>$chain</b>; program continues.. </span>"
	return 
    fi 
    nlig=`awk '{nres=substr($0,23,4); if(nres!=nres_old){dr++}; nres_old=nres} END{print dr}' lig.pdb`
    if [ $nlig != "1" ]; then
	nat_lig=`awk '{nres=substr($0,23,4); if(nres!=nres_old) {print i; nres_old=nres; i=0}; i++}' lig.pdb | tail -1`
	rm x??.pdb >& /dev/null
	split -$nat_lig --additional-suffix=".pdb" lig.pdb
    else
	rm x??.pdb >& /dev/null
	cp lig.pdb xaa.pdb 
    fi
    ((j=0))
    for i in `ls x??.pdb`; do
	((j=j+1))
	rm -fr prima >& /dev/null
	if [ ! -d prima ]; then 
	    mkdir prima
	fi
	cd prima
	awk -v i1="MG" -v i2="FE" '{if($NF!=i1 && $NF!=i2) print}' ../$i > lig.pdb 
	awk -v i1="MG" -v i2="FE" '{if($NF==i1 || $NF==i2) print $NF}' ../$i > ion-type
	if [ -s ion-type ]; then  #magnesium of iron found in ligand/cofactor
 	    charge=`primadorac.bash  -gspn lig.pdb  | awk '{c=$4-2; if(c>=0) {print "+"c} else {print c}}'`
	    #remove h from lig-p.pdb
	    awk '{if(NF==3 && $2=="hn") print $1}' lig-p.tpg > h_atoms
	    for k in `cat h_atoms`; do
		grep -v $k lig-p.pdb > tmp.pdb ; mv tmp.pdb lig-p.pdb
	    done
	    mv lig-p.pdb lig$charge.pdb # updated BLC/HEM 
	    primadorac.bash -ghsn lig$charge.pdb  >& prima.out 
	    if [ -f lig${charge}-min-p.pdb ]; then 
		# symmetrize charge on metal coordinating nitrogens
		c=`awk '{if(NF==3 && $2 == "nb") {c+=$3 } }END{print c/4}' lig${charge}-p.tpg`
		awk -v c=$c '{if(NF==3 && $2 == "nb") {print $1,$2,c }else{print}}' lig${charge}-p.tpg > ../lig-p.tpg 
		cp lig${charge}-p.prm  ../lig-p.prm
		cp lig${charge}-p.pdb  ../lig-p$j.pdb
		sed -i "s/ 1 / $j /g"  ../lig-p$j.pdb
		NL=`awk '{print $2}' prima.out`
		QQ=`awk '{print $4}' prima.out`
		RMS=`awk '{print $NF}' prima.out`
		echo "PrimaDORAC for $lig OK -> <b> Natoms = $NL ; charge = $QQ  RMS =$RMS </b>"
		echo "PrimaDORAC OK" > prima_ok
	    else
		echo "primadorac Failed. Output follows"
		echo " <p style="color:#FF0000";> `cat prima.out  | head -1`; </p>" 
		echo " <p style="color:#FF0000";> no ligand will be inserted in the fixed PDB; </p>"
	    fi
	else
	    primadorac.bash  -gspn lig.pdb >& prima.out 
	    if [ -f lig-min-p.pdb ]; then 
		cp lig-p.tpg ../lig-p.tpg  
		cp lig-p.prm ../lig-p.prm  
		cp lig-p.pdb ../lig-p$j.pdb
		NL=`awk '{print $2}' prima.out`
		QQ=`awk '{print $4}' prima.out`
		RMS=`awk '{print $NF}' prima.out`
		echo "PrimaDORAC for $lig OK -> <b> Natoms = $NL ; charge = $QQ  RMS =$RMS </b>"
		echo "PrimaDORAC OK" > prima_ok
	    else
		echo "primadorac Failed. Output follows"
 		echo  `cat prima.out  | head -1`
 		echo " no ligand will be inserted in the fixed PDB" 
	    fi
	fi
	cd ../
    done
    awk '{if($1=="RESIDUE") {print "RESIDUE lig"} else {print}}' lig-p.tpg  > tmp.tpg 
    mv tmp.tpg lig-p.tpg
}

# isolate selected chain (protein only)

./sequence.bash -l -c $chain $code

if [ ! -f SEQ ]; then
    echo "DNA chain or error in sequence alignment" > failures/$code.pdb
    echo "DNA chain or error in sequence alignment" > error
    exit
fi

# reset gaps if sequence is aligned with no gaps (due to e.g. insertions) 
if [ ! -s gaps ]; then
    dogaps="NONE"
fi

# isolate selected ligand calling the function ligands

if [ $lig != "NONE" ]; then 
    ligands $lig
fi

awk '{print tolower($1), " !",$2,$3}' SEQ  > seq+gap
sed -i "s/his/hse/g" seq+gap # fix histidines
sed -i "s/hid/hsd/g" seq+gap # fix histidines
sed -i "s/hie/hse/g" seq+gap # fix histidines
sed -i "s/hip/hsp/g" seq+gap # fix histidines
nresgap=`wc seq| awk '{print $1}'`
disl=`check_disulphide.awk rec.pdb | wc | awk '{print $1}'`

# disulfur bridge are checked in join_gaps and join_all_gaps
sed -i "s/cys/cysh/g" seq+gap # fix cysteines
    
# generates the gaps
if [ $dogaps == "yes" -a -s gaps ] ; then   
    echo "building gap PDB-templates...."
    cat gaps SEQ | awk '{if($1=="GAP") { igap++; g1[igap]=$2; g2[igap]=$3} else {for(j=1; j<=igap; j++) { if($2>g1[j] && $2 <g2[j]) printf "-" $1; if($2==g2[j]) printf "\n"}}}' > gaps.res

    ((j=0));
    rm -fr gap.* >&/dev/null
    for i in `cat gaps.res` ; do
	((j=j+1));
	J=`echo $j | awk '{printf "%02d",$1}'`
	echo $i | tr [A-Z] [a-z] | sed "s/-/ /g" | awk '{for(i=1; i<=NF; i++) print $i}' > gap.$J ;
	sed -i "s/his/hse/g" gap.$J
	sed -i "s/hid/hsd/g" gap.$J
	sed -i "s/hie/hse/g" gap.$J
	sed -i "s/hip/hsp/g" gap.$J
    done
    
    # build the PDB gaps using alltrans

    ((j=0))
    ngaps=`wc gaps | awk '{print $1}'`
    for i in gap.* ; do
	gap_length=`wc $i | awk '{print $1}'` 
	((j=j+1))
	J=`echo $j | awk '{printf "%02d",$1}'`
	first=`head -$j gaps | tail -1 | awk '{print $2}'`
	last=`head -$j gaps | tail -1 | awk '{print $3}'`
	resf=`awk -v n=$first '{if($3==n) print }' seq+gap`
	resl=`awk -v n=$last '{if($3==n) print }' seq+gap`
	awk -v n=$first '{if(int(substr($0,23,4))==n) print }' rec.pdb > first.pdb
	awk -v n=$last '{if(int(substr($0,23,4))==n) print }' rec.pdb > last.pdb
	start=`sed -n "${j}p" gaps | awk '{printf $2}'` 
	echo "processing gap" $j "that starts at "$start "...."
	nam=`echo $i | sed "s/gap\./G/g"` ;
	xC=`awk '{print substr($0,14,4),substr($0,23,4),substr($0,27,30)}' rec.pdb  | grep " $start " | grep "C " | awk '{print $3}'`
	yC=`awk '{print substr($0,14,4),substr($0,23,4),substr($0,27,30)}' rec.pdb  | grep " $start " | grep "C " | awk '{print $4}'`
	zC=`awk '{print substr($0,14,4),substr($0,23,4),substr($0,27,30)}' rec.pdb  | grep " $start " | grep "C " | awk '{print $5}'`
	xCA=`awk '{print substr($0,14,4),substr($0,23,4),substr($0,27,30)}' rec.pdb  | grep " $start " | grep "CA " | awk '{print $3}'`
	yCA=`awk '{print substr($0,14,4),substr($0,23,4),substr($0,27,30)}' rec.pdb  | grep " $start " | grep "CA " | awk '{print $4}'`
	zCA=`awk '{print substr($0,14,4),substr($0,23,4),substr($0,27,30)}' rec.pdb  | grep " $start " | grep "CA " | awk '{print $5}'`
	xO=`awk '{print substr($0,14,4),substr($0,23,4),substr($0,27,30)}' rec.pdb  | grep " $start " | grep "O " | awk '{print $3}'`
	yO=`awk '{print substr($0,14,4),substr($0,23,4),substr($0,27,30)}' rec.pdb  | grep " $start " | grep "O " | awk '{print $4}'`
	zO=`awk '{print substr($0,14,4),substr($0,23,4),substr($0,27,30)}' rec.pdb  | grep " $start " | grep "O " | awk '{print $5}'`
	#   echo "   x,y,z coordinates of terminal C ->" $xC $yC $zC
	#   echo "   x,y,z coordinates of terminal O ->" $xO $yO $zO
	#   echo "   x,y,z coordinates of terminal CA ->" $xCA $yCA $zCA 
	origin=`echo $xC $yC $zC $xCA $yCA $zCA $xO $yO $zO | ./cord.awk` 
	#   echo $xC $yC $zC $xCA $yCA $zCA $xO $yO $zO | ./cord.awk
	echo " x y z coordinates of the carbonyl carbon"  $origin
	$orac_home/tools/alltrans/alltrans $start $origin < $i > $nam.pdb 
	cat first.pdb $nam.pdb last.pdb > G$nam.pdb
	cat first.pdb fort.90 last.pdb >  G90$nam.pdb
	cat first.pdb fort.180 last.pdb >  G180$nam.pdb
	cat first.pdb fort.270 last.pdb >  G270$nam.pdb
#	cat first.pdb fort.92 last.pdb >  G92$nam.pdb
#	cat first.pdb fort.272 last.pdb >  G272$nam.pdb
	echo $resf > subseq.$J
	cat gap.$J >> subseq.$J
	echo $resl >> subseq.$J
#       prepare GAP?.in files for join_gap.bash
	cat subseq.$J steer_tmp.in | awk '{i++;res[i]=$0; if($1=="#START_TEMPLATE") {ok=1;n=i-1}; if(ok==1) {print}; if($1=="JOIN" && $2=="SOLUTE") {for(j=1; j<=n; j++){print res[j]} }}' > GAP$J.in
	nampdb="G"$nam.pdb
	sed -i "s/gappdb/$nampdb/g" GAP$J.in
	if [ -s fort.101 ] ; then
	    unk=`awk '{print $1}' fort.101`
	    sed -i "s/$unk/ala/g" GAP$J.in
	fi
	$orac_home/src/GNU-FFTW-OMP/orac < GAP$J.in > orac.out
	grep NaN orac.out> test_file
	if [ -s test_file ]; then
	    #           try with 180 rotation
	    sed  -i "s/GG$J.pdb/G180G$J.pdb/g" GAP$J.in 
	    $orac_home/src/GNU-FFTW-OMP/orac  < GAP$J.in > orac.out
	fi
	ng=`wc subseq.$J | awk '{print $1}'` 
	atm1=`awk -v n=$ng '{if($3=="C" &&  $5 == n-1) print}' 1.pdb | awk '{print $2}'`
	atm2=`awk -v n=$ng '{if($3=="N" &&  $5 == n) print}' 1.pdb | awk '{print $2}'`
	fr=`grep ATOM 1.pdb | awk '{if(int(substr($0,23,4)) == 1 ) {i++; a[i]=NR}}END{print a[1],a[i]}'` 
	lr=`grep ATOM 1.pdb | awk -v n=$ng '{if(int(substr($0,23,4))==n) {i++; a[i]=NR}}END{print a[1],a[i]}'`
	sed -i "s/fr1 fr2/$fr/g" GAP$J.in
	sed -i "s/lr1 lr2/$lr/g" GAP$J.in
	sed -i "s/atm1/$atm1/g" GAP$J.in
	sed -i "s/atm2/$atm2/g" GAP$J.in
	sed -i "s/\#toremove//g" GAP$J.in
	sed -i "s/ 10.0 OPEN/ 1.0 OPEN/g" GAP$J.in
	if (( $gap_length > 30 ))  ; then
	    sed -i "s/TIME 1.0/TIME 1500.0/g" GAP$J.in
	else
	    sed -i "s/TIME 1.0/TIME 500.0/g" GAP$J.in
	fi
#       no longer needed when steer is done by run_minimize  	
#	sed -i "s/\#checkdist//g" GAP$J.in
#	dist=`$orac_home/src/GNU-FFTW-OMP/orac < GAP$J.in | grep "starting value"  | awk '{print $NF}'`
#	sed -i "s/100\.0/$dist/g" GAP$J.in
    done
fi    

