#!/bin/bash
#  download pdb file from PDB and finds gap hetero, metals and chains in the pdb file 
#  and printout relevant info 
# 
# syntax: 
#   ./pdbrestore.bash [-f] pdbcode
#    "pdbcode" is the alphanumeric code of the PDB file
#    if -f option is used; no wget from PDB ; file with pdbcode=xxxx 
#    [ i.e. pdbxxxx.ent] is assumed to be local  
#
export LC_NUMERIC="en_US.UTF-8"
if [ $# == "0" ] ; then
    echo " "
    echo " This script restores a raw PDB file (add hydrogens and fill gaps) "
    echo " "
    echo " Syntax: pdbrestore.bash [opt] pdbcode"
    echo "          where pdbcode is a four chars coding the PDB structure"
    echo " Options:                                                                                 " 
    echo "        -f " 
    echo "          if specified PDB file is in the localdir; else is taken from the PDB " 
    echo "        -s " 
    echo "          do only check and writes gaps (if any) in gaps.OUT "
    echo "        -z " 
    echo "          zip the final PDB fixed file(s) "
    echo "        -w model " 
    echo "          generate solvated box with selected water model (opc,spc,tip3)  "
    echo "        -r " 
    echo "          clean after execution"
    echo "        -R " 
    echo "          clean before execution and EXIT"
    echo "        -l lig " 
    echo "          'lig' is the LIGAND in the PDB file (default is none)"
    echo "        -c chain " 
    echo "          'chain' is the selected chain (default is 'A')"
    echo "        -g gaps " 
    echo "           'gaps' indicates the selected gap  form gaps.OUT (default is 'NONE')"
    echo "           e.g. ' pdbrestore.bash -g 3 pdbcode' "
    echo "                  fix the 3rd gap as specified in gaps.OUT" 
    echo "           if 'gaps' is 'ALL' all gaps are selected"
    echo ""
    exit 
fi
orac_bin=`which orac`
orac_home=`echo $orac_bin | sed "s?bin/orac??g"`
tdir=$orac_home/tools/templates
nlig=0
metals="BE CU NI PT ZN CO PD AG CR FE MG V MN HG CD YB SN PB EU SR SM BA RA AL CR TL Y LA CE PR ND SM EU GD TB DY ER TM LU HF ZR CE U PU TH"
cofactors="HEM HEA HEC HEC HED HEF HEO BCL BPH"
local=0
water=0
check=0
chain="A"
unk="none"
lig="NONE"
dogaps="NONE"
zip=0
clean=0
clean_exit=0

while getopts ":fsl:zrw:Rc:g:" opt; do 
    case $opt in 
	f) 
	    local=1; 
	    ;;
	s) 
	    check=1; 
	    ;;
	w) 
	    water=$OPTARG; 
	    ;;
	z) 
	    zip=1; 
	    ;;
	r) 
	    clean=1; 
	    ;;
	R) 
	    clean_exit=1; 
	    ;;
	l) 
	    lig=$OPTARG; 
	    ;;
	c) 
	    chain=$OPTARG; 
	    ;;
	g) 
	    dogaps=$OPTARG; 
	    ;;
	\?) 
	    echo "invalid option"  "-"$OPTARG
	    exit
	    ;;
    esac
done
code=${!#}
echo "pdbrestore started with: chain=$chain |  gaps=$dogaps | ligand=$lig"

if [ $clean_exit == 1 ] ; then
    rm -fr *.pdb  *.out  tmp.disulf prima GAP*.out fort.*  *autopsf* x.? rec.psf *.OUT seq.* subseq* SEQ* *tmp*.* 0.in 00.in gap* test* GAP* lig*.* NaN*
    exit
fi
rm error >& /dev/null

# function ligand(s) 
function ligands {
    awk -v c=$chain -v l=$1 '{if($1=="ENDMDL") {exit}; if(( substr($0,22,1)==c || substr($0,22,1)==" " )&& substr($0,18,3)==l) print }' $code.pdb > lig.pdb
    if [ ! -s lig.pdb ]; then 
	echo " no ligand $1 found ; program continues.."
	return 
    fi 
    nlig=`awk '{nres=substr($0,23,4); if(nres!=nres_old){dr++}; nres_old=nres} END{print dr}' lig.pdb`
    if [ $nlig != "1" ]; then
	nat_lig=`awk '{nres=substr($0,23,4); if(nres!=nres_old) {print i; nres_old=nres; i=0}; i++}' lig.pdb | tail -1`
	rm x??.pdb >& /dev/null
	split -$nat_lig --additional-suffix=".pdb" lig.pdb
    else
	rm x??.pdb >& /dev/null
	cp lig.pdb xaa.pdb 
    fi
    ((j=0))
    for i in `ls x??.pdb`; do
	((j=j+1))
	rm -fr prima >& /dev/null
	if [ ! -d prima ]; then 
	    mkdir prima
	fi
	cd prima
	awk -v i1="MG" -v i2="FE" '{if($NF!=i1 && $NF!=i2) print}' ../$i > lig.pdb 
	awk -v i1="MG" -v i2="FE" '{if($NF==i1 || $NF==i2) print $NF}' ../$i > ion-type
	if [ -s ion-type ]; then  #magnesium of iron found in ligand/cofactor
 	    charge=`primadorac.bash  -gspn lig.pdb  | awk '{c=$4-2; if(c>=0) {print "+"c} else {print c}}'`
	    #remove h from lig-p.pdb
	    awk '{if(NF==3 && $2=="hn") print $1}' lig-p.tpg > h_atoms
	    for k in `cat h_atoms`; do
		grep -v $k lig-p.pdb > tmp.pdb ; mv tmp.pdb lig-p.pdb
	    done
	    mv lig-p.pdb lig$charge.pdb # updated BLC/HEM 
	    primadorac.bash -ghsn lig$charge.pdb  >& prima.out 
	    if [ -f lig${charge}-min-p.pdb ]; then 
		# symmetrize charge on metal coordinating nitrogens
		c=`awk '{if(NF==3 && $2 == "nb") {c+=$3 } }END{print c/4}' lig${charge}-p.tpg`
		awk -v c=$c '{if(NF==3 && $2 == "nb") {print $1,$2,c }else{print}}' lig${charge}-p.tpg > ../lig-p.tpg 
		cp lig${charge}-p.prm  ../lig-p.prm
		cp lig${charge}-p.pdb  ../lig-p$j.pdb
		sed -i "s/ 1 / $j /g"  ../lig-p$j.pdb
		echo "PrimaDORAC for $1 OK"
		echo "PrimaDORAC OK" > prima_ok
	    else
		echo "primadorac Failed. Output follows"
 		echo  `cat prima.out  | head -1`
 		echo " no ligand will be inserted in the fixed PDB" 
	    fi
	else
	    primadorac.bash  -gspn lig.pdb >& prima.out 
	    if [ -f lig-min-p.pdb ]; then 
		cp lig-p.tpg ../lig-p.tpg  
		cp lig-p.prm ../lig-p.prm  
		cp lig-p.pdb ../lig-p$j.pdb
		sed -i "s/ 1 / $j /g"  ../lig-p$j.pdb
		echo "PrimaDORAC for $1 OK"
		echo "PrimaDORAC OK" > prima_ok
		#	cat prima.out
	    else
		echo "primadorac Failed. Output follows"
 		echo  `cat prima.out  | head -1`
 		echo " no ligand will be inserted in the fixed PDB" 
	    fi
	fi
	cd ../
    done
    if [ -s lig-p.tpg ]; then 
	awk '{if($1=="RESIDUE") {print "RESIDUE lig"} else {print}}' lig-p.tpg  > tmp.tpg
	mv tmp.tpg lig-p.tpg
    fi
}
# function solvate (called of watre != 0) 
function solvate {
    input_file=$1
    grep -v REM ${code}_fixed.pdb > tmp.pdb
    orient
    boxsize=`grep CRYSTAL setup.tmp | awk '{print $2}'`
    linked=`grep LINKED_CELL  potential.tmp | awk '{print $2}'`
    ngrid=`grep EWALD   potential.tmp | awk '{print $4}'`
    nwat=`grep GENERATE  solvent.tmp | awk '{print $2 - int(0.08*$2)}'`
    nwat=`grep GENERATE  solvent.tmp | awk '{print $2 - int(0.08*$2)}'`
    nmolw=`echo $nwat | awk '{print $1^3}'`
    if [ $nwat -gt  32 ]; then
       water=0 
       echo "<br> <span style="color:#FF0000"> number of water: $nmolw  </span>" 
       echo "<br> <span style="color:#FF0000"> Solvated MD exceeds max allowed mol </span>" 
    fi
    watermodel=$2
    sed "s/#toinsert//g" $input_file | awk '{if ($NF!="#toremove") print}' > md.in
    sed -i "s/BOXSIZE/$boxsize/g" md.in
    sed -i "s/NLINK/$linked/g" md.in
    sed -i "s/NWAT/$nwat/g" md.in
    sed -i "s/NGRID/$ngrid/g" md.in
    sed -i "s/watermodel/$watermodel/g" md.in
    sed -i "s/pdbcode/${code}_fixed.pdb/g" md.in
    cp $orac_home/pdb/water.pdb . 
    rm *.tmp              # purge orient files 
    echo "launching NPT simulatioon of solvated $code. It may takes some time" 
    $orac_home/src/GNU-FFTW-OMP/orac < md.in > md.out
    n=`tail -2 md_1.pdb | head -1  | awk '{print $2+2}'`
    if [ ! -z $n ] ; then  
	tail -$n md_1.pdb > ${code}_fixed+wat.pdb
    fi 
    }

# wget the file if -f is not given
if [ $check == 1 ]; then 
if [ $local != 1 ] ; then 
    wget  ftp://ftp.ebi.ac.uk/pub/databases/pdb/data/structures/all/pdb/pdb${code}.ent.gz; 
    gzip -d pdb${code}.ent.gz ; mv   pdb${code}.ent ${code}.pdb
else
    mv $code .
    if [ $extf != ".pdb" ] ; then
	echo  "Not a pdb file"
	exit
    else
      code=`echo $code | awk '{print substr($1,1,length($1)-4)}'` 
    fi
fi
if [ ! -s $code.pdb ] ; then
    echo "PDBfile" $code "is empty; check the spelling"
    exit
fi


# scanning for general info 
echo "" 
echo "== GENERAL SECTION  " 
( printf "    NRESIDUE "; grep "ATOM " ${code}.pdb | grep " CA " | wc -l  )
awk '/TITLE/{print "   ", $0}' ${code}.pdb
awk '/DOI/{print "   ", $0}' ${code}.pdb
 
echo ""
echo " == GAP SECTION " 

findgp.awk  ${code}.pdb   > gaps.OUT 
findgp.awk  ${code}.pdb 

# search for hetero and metals 
echo "" 

echo "== HETERO ATOMS SECTION " 

hetero.awk  ${code}.pdb

# scan for chains info
echo ""
echo "== CHAIN SECTION" 
awk '{\
    if($1=="ENDMDL") {\
	exit\
    }\
    if($1~"^ATOM") {\
	{\
	    c=substr($0,22,1); \
	    n[c]+=1;\
	} \
    }\
}\
END {\
    if(n["A"]!=0) print "    chain A", n["A"];  \
    if(n["B"]!=0) print "    chain B", n["B"];  \
    if(n["C"]!=0) print "    chain C", n["C"];  \
    if(n["D"]!=0) print "    chain D", n["D"];  \
    if(n["E"]!=0) print "    chain E", n["E"];  \
    if(n["F"]!=0) print "    chain F", n["F"];  \
    if(n["G"]!=0) print "    chain G", n["G"];  \
    if(n["H"]!=0) print "    chain H", n["H"];  \
    if(n["J"]!=0) print "    chain J", n["J"];  \
    if(n["K"]!=0) print "    chain K", n["K"];  \
    if(n["I"]!=0) print "    chain I", n["I"];  \
    if(n["L"]!=0) print "    chain L", n["L"];  \
    if(n["M"]!=0) print "    chain M", n["M"];  \
    if(n["N"]!=0) print "    chain N", n["N"];  \
    if(n["P"]!=0) print "    chain P", n["P"];  \
    if(n["Q"]!=0) print "    chain Q", n["Q"];  \
    if(n["R"]!=0) print "    chain R", n["R"];  \
    if(n["S"]!=0) print "    chain S", n["S"];  \
    if(n["T"]!=0) print "    chain T", n["T"];  \
    if(n["U"]!=0) print "    chain U", n["U"];  \
    if(n["V"]!=0) print "    chain V", n["V"];  \
    if(n["Z"]!=0) print "    chain Z", n["Z"];  \
    if(n["X"]!=0) print "    chain X", n["X"];  \
    if(n["nochain"]!=0) print "    nochain ", n[" "];  \
}' ${code}.pdb
exit
fi

##########################################################
# START th EXECUTION after choices (fix_pdb_code.bash) 
##########################################################

if [ ! -f $code.pdb ]; then
    echo "wrong or missing PDB file " $code.pdb
    echo "syntax: "
    echo "       pdbrestore.bash [ opt ] pdbcode "
    echo "       where pdbcode is a PDB file that MUST reside in the current dir"
    echo "       Issue 'pdbrestore.bash -s pdbcode' and then try again"
    echo "Example: "
    echo "       pdbrestore.bash -s pdbcode "     
    echo "       pdbrestore.bash -c A -l LIG pdbcode "     
    exit
fi

mv gaps.OUT savedgaps >& /dev/null
mv $code.pdb codepdbsaved
if [ -f  $code.OUT ]; then
    mv $code.OUT codeoutsaved
fi
# clean 
rm -fr *.pdb  *.out  tmp.disulf prima GAP*.out fort.*  *autopsf* x.? rec.psf *.OUT seq* subseq* SEQ* *tmp*.* 0.in 00.in gap* test* GAP* lig*.* NaN* metals.* tmp.*
if [ -f codeoutsaved ]; then
    mv codeoutsaved $code.OUT
fi
mv codepdbsaved $code.pdb 
mv savedgaps gaps.OUT >& /dev/null
sed "s?ORAC_HOME?$orac_home?g" $tdir/steer_template.in  > steer_tmp.in

# isolate selected chain (protein only)

sequence.bash -l -c $chain $code

# reset gaps if sequence is aligned with no gaps (due to e.g. insertions) 
if [ ! -s gaps ]; then
    dogaps="NONE"
fi
if [ ! -f SEQ ]; then
    exit
fi

# fix ligand (if any)  

if [ ! -z $lig ]; then 
    ligands $lig
fi

awk '{print tolower($1), " !",$2,$3}' SEQ  > seq+gap
sed -i "s/his/hse/g" seq+gap # fix histidines
sed -i "s/hip/hsp/g" seq+gap # fix protonated histidines
disl=`check_disulphide.awk rec.pdb | wc | awk '{print $1}'`

# disulfur bridge are checked in join_gaps and join_all_gaps
sed -i "s/cys/cysh/g" seq+gap # fix cysteines
    
# generates the gaps
if [ $dogaps != "NONE" -a -s gaps ] ; then   
    echo "building gap PDB-templates...."
    cat gaps SEQ | awk '{if($1=="GAP") { igap++; g1[igap]=$2; g2[igap]=$3} else {for(j=1; j<=igap; j++) { if($2>g1[j] && $2 <g2[j]) printf "-" $1; if($2==g2[j]) printf "\n"}}}' > gaps.res

    ((j=0));
    rm -fr gap.* >&/dev/null
    for i in `cat gaps.res` ; do
	((j=j+1));
	J=`echo $j | awk '{printf "%02d",$1}'`
	echo $i | tr [A-Z] [a-z] | sed "s/-/ /g" | awk '{for(i=1; i<=NF; i++) print $i}' > gap.$J ;
	sed -i "s/his/hse/g" gap.$J
	sed -i "s/hip/hsp/g" gap.$J
    done
    # build the PDB gaps using alltrans

    ((j=0))
    ngaps=`wc gaps | awk '{print $1}'`
    for i in gap.* ; do
	gap_length=`wc $i | awk '{print $1}'` 
	((j=j+1))
	J=`echo $j | awk '{printf "%02d",$1}'`
	first=`head -$j gaps | tail -1 | awk '{print $2}'`
	last=`head -$j gaps | tail -1 | awk '{print $3}'`
	resf=`awk -v n=$first '{if($3==n) print }' seq+gap`
	resl=`awk -v n=$last '{if($3==n) print }' seq+gap`
	awk -v n=$first '{if(int(substr($0,23,4))==n) print }' rec.pdb > first.pdb
	awk -v n=$last '{if(int(substr($0,23,4))==n) print }' rec.pdb > last.pdb
	start=`sed -n "${j}p" gaps | awk '{printf $2}'` 
	echo "processing gap" $j "that starts at "$start "...."
	nam=`echo $i | sed "s/gap\./G/g"` ;
	xC=`awk '{print substr($0,14,4),substr($0,23,4),substr($0,27,30)}' rec.pdb   | grep " $start " | grep "C " | awk '{print $3}'`
	yC=`awk '{print substr($0,14,4),substr($0,23,4),substr($0,27,30)}' rec.pdb  | grep " $start " | grep "C " | awk '{print $4}'`
	zC=`awk '{print substr($0,14,4),substr($0,23,4),substr($0,27,30)}' rec.pdb  | grep " $start " | grep "C " | awk '{print $5}'`
	xCA=`awk '{print substr($0,14,4),substr($0,23,4),substr($0,27,30)}' rec.pdb  | grep " $start " | grep "CA " | awk '{print $3}'`
	yCA=`awk '{print substr($0,14,4),substr($0,23,4),substr($0,27,30)}' rec.pdb  | grep " $start " | grep "CA " | awk '{print $4}'`
	zCA=`awk '{print substr($0,14,4),substr($0,23,4),substr($0,27,30)}' rec.pdb  | grep " $start " | grep "CA " | awk '{print $5}'`
	xO=`awk '{print substr($0,14,4),substr($0,23,4),substr($0,27,30)}' rec.pdb  | grep " $start " | grep "O " | awk '{print $3}'`
	yO=`awk '{print substr($0,14,4),substr($0,23,4),substr($0,27,30)}' rec.pdb  | grep " $start " | grep "O " | awk '{print $4}'`
	zO=`awk '{print substr($0,14,4),substr($0,23,4),substr($0,27,30)}' rec.pdb  | grep " $start " | grep "O " | awk '{print $5}'`
	#   echo "   x,y,z coordinates of terminal C ->" $xC $yC $zC
        #   echo "   x,y,z coordinates of terminal O ->" $xO $yO $zO
	#   echo "   x,y,z coordinates of terminal CA ->" $xCA $yCA $zCA 
	origin=`echo $xC $yC $zC $xCA $yCA $zCA $xO $yO $zO | cord.awk` 
	#   echo $xC $yC $zC $xCA $yCA $zCA $xO $yO $zO | cord.awk
	echo " x y z coordinates of the carbonyl carbon"  $origin
	alltrans $start $origin < $i > $nam.pdb
	cat first.pdb $nam.pdb last.pdb > G$nam.pdb
	cat first.pdb fort.90 last.pdb >  G90$nam.pdb
	cat first.pdb fort.180 last.pdb >  G180$nam.pdb
	cat first.pdb fort.270 last.pdb >  G270$nam.pdb
	cat first.pdb fort.92 last.pdb >  G92$nam.pdb
	cat first.pdb fort.272 last.pdb >  G272$nam.pdb
	echo $resf > subseq.$J
	cat gap.$J >> subseq.$J
	echo $resl >> subseq.$J
#       prepare GAP?.in files for join_gap.bash
	cat subseq.$J steer_tmp.in | awk '{i++;res[i]=$0; if($1=="#START_TEMPLATE") {ok=1;n=i-1}; if(ok==1) {print}; if($1=="JOIN" && $2=="SOLUTE") {for(j=1; j<=n; j++){print res[j]} }}' > GAP$J.in
	nampdb="G"$nam.pdb
	sed -i "s/gappdb/$nampdb/g" GAP$J.in
	if [ -s fort.101 ] ; then
	    unk=`awk '{print $1}' fort.101`
	    sed -i "s/$unk/ala/g" GAP$J.in
	fi
	orac < GAP$J.in >& orac.out
	grep NaN orac.out > test_file
	if [ -s test_file ]; then
	    sed  -i "s/GG$J.pdb/G180G$J.pdb/g" GAP$J.in
	    orac < GAP$J.in >& orac.out
	fi
	ng=`wc subseq.$J | awk '{print $1}'` 
	atm1=`awk -v n=$ng '{if($3=="C" &&  $5 == n-1) print}' 1.pdb | awk '{print $2}'`
	atm2=`awk -v n=$ng '{if($3=="N" &&  $5 == n) print}' 1.pdb | awk '{print $2}'`
	fr=`grep ATOM 1.pdb | awk '{if(int(substr($0,23,4)) == 1 ) {i++; a[i]=NR}}END{print a[1],a[i]}'` 
	lr=`grep ATOM 1.pdb | awk -v n=$ng '{if(int(substr($0,23,4))==n) {i++; a[i]=NR}}END{print a[1],a[i]}'`
	sed -i "s/fr1 fr2/$fr/g" GAP$J.in
	sed -i "s/lr1 lr2/$lr/g" GAP$J.in
	sed -i "s/atm1/$atm1/g" GAP$J.in
	sed -i "s/atm2/$atm2/g" GAP$J.in
	sed -i "s/\#toremove//g" GAP$J.in
	sed -i "s/ 10.0 OPEN/ 1.0 OPEN/g" GAP$J.in
	if (( $gap_length > 30 ))  ; then
	    sed -i "s/TIME 1.0/TIME 1500.0/g" GAP$J.in
	else
	    sed -i "s/TIME 1.0/TIME 500.0/g" GAP$J.in
	fi
    done
fi    
##########################################################
# CONTINUE the EXECUTION after choices (join_gap.bash) 
##########################################################
# isolate selected chain (protein only)

if [ ! -s chain.pdb ]; then
    echo "ERROR  chain.pdb is empty"
    exit
fi

sed "s?ORAC_HOME?$orac_home?g" $tdir/template_0.in  > tmp.in 
if [ $dogaps == "NONE" ]; then
    awk '{print tolower($1), " !", $2}' SEQ.full | grep -v "xxx"  > seq.with-gap
    sed -i "s/his/hse/g" seq.with-gap  # fix histidines
    sed -i "s/hip/hsp/g" seq.with-gap  # fix histidines
    sed -i "s/cys/cysh/g" seq.with-gap # fix cysteines (momentarily)
    nresgap=`wc seq.with-gap | awk '{print $1}'`
# fix termini 
    awk -v n=$nresgap '{if(NR==1) {print $1"-h", " ! ",$3} else if(NR==n)  {print $1"-o", " ! ",$3} else {print}}' seq.with-gap > tmp; mv tmp seq.with-gap

    cat seq.with-gap  tmp.in | awk '{i++;res[i]=$0; if($2=="sequence") {ok=1;n=i-1}; if(ok==1) {print}; if($1=="JOIN" && $2=="SOLUTE") {for(j=1; j<=n; j++){print res[j]} }}' > 0.in
    sed -i "s/rec+gap/rec/g" 0.in
    fix_cys.bash 0.in rec.pdb

    #get pdb coordinates of metals
    rm metals.pdb >& /dev/null
    for i in $metals ; do
	awk -v c=$chain -v mtl=$i '{if($1=="ENDMDL") {exit}; if(( substr($0,22,1)==c || substr($0,22,1)==" " ) && $1~"^HETATM" && $NF==mtl)  print }' $code.pdb >> metals.pdb
    done

#get residue  type of metals
    rm metals.res >& /dev/null
    for i in  $metals ; do 
	awk -v c=$chain -v mtl=$i '{if($1=="ENDMDL") {exit}; if(( substr($0,22,1)==c || substr($0,22,1)==" " ) && $1~"^HETATM" && $NF==mtl)  print $NF }' $code.pdb | awk '{print tolower($1)}' >> metals.res
    done
    
    if [ -s metals.pdb ] ; then
	echo "fixing metals .. "
	cat rec.pdb metals.pdb| grep -v "END" > tmp.pdb; mv tmp.pdb rec.pdb
	sed $'/endsolute/{e cat metals\.res\n}' 0.in > tmp.in # insert residue metal types BEFORE 'endsolute' in template 
	mv tmp.in 0.in
    fi
    
#   fix the ligand as last residue to avoid problems with ADD_TPG and residue numbering    
    if [ -s lig-p1.pdb -a -s prima/prima_ok ] ; then
	echo "fixing ligand $lig"
	cat rec.pdb lig-p?.pdb| grep -v "END" > tmp.pdb; mv tmp.pdb rec.pdb
	#insert ligand(s) after before 'endsolute' in template 
	for i in $(seq 1 $nlig); do
	    sed -i '/endsolute/i lig ' 0.in
	done
	cp 0.in tmp.in
	awk '{if ($1=="&PARAMETERS") {print $0; print "   READ_TPG_ASCII lig-p.tpg"; print "   READ_PRM_ASCII lig-p.prm"} else {print}}' tmp.in > 0.in
    fi
# now transfor all CYS-binding metals from cysh to cysm
    if [ -s metals.pdb ] ; then
	fix_cys_zn.bash 0.in rec.pdb
    fi
    sed -i "s/TIME 1.0/TIME 150.0/g" 0.in   
    sed -i "s/1.0 OPEN 2.pdb/15.0 OPEN 2.pdb/g" 0.in   
    sed -i "s/THREADS 1/THREADS 6/g" 0.in   
    orac < 0.in >& tmp.out
    grep "COMPLETED" tmp.out > test_file
    grep "NaN" tmp.out > NaN_file
    if [ -s test_file -a -s 2.pdb -a ! -s NaN_file ] ; then
	echo " " 
	tail -35 tmp.out  | grep Tstep | awk '{print "        Time/Energy = "$3,$6}'
	echo " " 
	n=`tail -2 2.pdb   | head -1 | awk '{print $2+2}'`
	rm ${code}_fixed.pdb >& /dev/null
	tail -$n 2.pdb > ${code}_fixed.pdb
	sr=`head -1 chain.pdb | awk '{printf "%02d\n", substr($0,23,4)}'`
	if [ $sr != 1 ]; then
	    awk -v sr=$sr '{if($1=="ATOM") {printf "%22s",substr($0,1,22); printf "%4d",$5+sr-1; printf "%28s\n",substr($0,27,28)}}' ${code}_fixed.pdb > tmp.pdb;
	    mv tmp.pdb ${code}_fixed.pdb
	fi
	if [ $water != 0 ] ; then
	    solvate 0.in $water 
	fi
	if [ $zip == 1 ]; then 
	    gzip -f ${code}_fixed.pdb
	fi
	echo "PDB fixed!! " 
    else
	ls -ltr test_file
	echo " Something went awry. So sorry..." 
    fi
    exit
fi
# THE FOLLOWING  IS PROCESSED WHEN GAPS != 0  

# prepare 0.in from template with current gap or ALL gaps

if [ $dogaps == "ALL" ]; then
    ngaps=`wc gaps | awk '{print $1}'`
else
    ngaps=1
fi
for j in $(seq 1 $ngaps); do
    J=`echo $j | awk '{printf "%02d",$1}'`
    if [ $dogaps == "ALL" ] ; then 
	gap_n=$J
    else
	DOGAPS=`echo $dogaps | awk '{printf "%02d",$1}'`
	gap_n=`echo $DOGAPS | awk '{printf "%02d",$1}'` 
    fi
    first=`head -$gap_n gaps | tail -1 | awk '{print $2}'`
    last=`head -$gap_n gaps | tail -1 | awk '{print $3}'`
    echo "Doing gap " $gap_n $first $last
    awk '{print tolower($1), " !", $2}' SEQ.full | grep -v "xxx" | awk -v n1=$first -v n2=$last '{if($3!=n1 && $3!=n2) print}'  > seq.with-gap
    nresgap=`wc seq.with-gap | awk '{print $1}'`
    sed -i "s/his/hse/g" seq.with-gap  # fix histidines
    sed -i "s/hip/hsp/g" seq.with-gap  # fix histidines
    sed -i "s/cys/cysh/g" seq.with-gap # fix cysteines (momentarily) 
# fix termini 
    awk -v n=$nresgap '{if(NR==1) {print $1"-h", " ! ",$3} else if(NR==n)  {print $1"-o" , " ! ",$3} else {print}}' seq.with-gap > tmp; mv tmp seq.with-gap 

    sed "s?ORAC_HOME?$orac_home?g" $tdir/template_0.in  > tmp.in
    cat seq.with-gap  tmp.in | awk '{i++;res[i]=$0; if($2=="sequence") {ok=1;n=i-1}; if(ok==1) {print}; if($1=="JOIN" && $2=="SOLUTE") {for(j=1; j<=n; j++){print res[j]} }}' > 0.in
    
    ((ng=$last + 1 ));

# launch orac with SMD on gap

    ((jr=-90));
    for i in GG${gap_n}.pdb  G90G${gap_n}.pdb  G180G${gap_n}.pdb  G270G${gap_n}.pdb G92G${gap_n}.pdb G272G${gap_n}.pdb ; do
	if [ -s fort.101 ] ; then
	    cat subseq.${gap_n} 0.in | sed "s/$unk/ala/g" | insert_seq.awk -v ng=$ng > tmp.in
	else
	    cat subseq.${gap_n} 0.in | insert_seq.awk -v ng=$ng > tmp.in
	fi
	((jr=jr+90));     
	echo "        testing  orientation " $jr " for gap " $gap_n  
	sed  "s/GG${gap_n}.pdb/$i/g" GAP${gap_n}.in > GAP${J}_$jr.in    
	sed -i "s/energy_then_die/ \! energy_then_die/g" GAP${J}_$jr.in
	rm 1.pdb >& /dev/null
	orac < GAP${J}_$jr.in >& orac.out
	n=`tail -2 1.pdb | head -1  | awk '{print $2+2}'`
	jump=0
	if [ ! -z $n ]; then  
	    tail -$n 1.pdb | grep -v REMARK | grep -v END> x.pdb 
	else
	    echo "something was wrong with gap ${gap_n}.."
	    jump=1
	fi
#   prepare rec+gap.pdb    
	if [ $jump == 0 ]; then # skip if gap minimization failed 
	awk -v n=$first '{if(int(substr($0,23,4))!=n) print }' rec.pdb |  awk -v n=$last '{if(int(substr($0,23,4))!=n) print }' > tmp.pdb 

	cat x.pdb  tmp.pdb | insert_gap.awk -v ng=$ng > rec+gap.pdb
	cp rec+gap.pdb rec+gap$jr.pdb 
#   launch orac on joined sequence    
	fix_cys.bash tmp.in rec+gap.pdb 
#	$orac_home/src/GNU-FFTW-OMP/orac < tmp.in >& tmp.out
	orac < tmp.in >& tmp.out
	check_end=`grep COMPLETED tmp.out | awk '{print $2}'`
	grep NaN tmp.out > test_file
	if [ ! -s test_file  ] ; then
	    #       No NaN: first test passed. Relaunch orac  full minimizations
	    echo "        No NaN for t=1.0. Launching full minimization"
	    sed -i "s/THREADS 1/THREADS 6/g" tmp.in   
	    sed -i "s/TIME 1.0/TIME 150.0/g" tmp.in   
	    sed -i "s/1.0 OPEN 2.pdb/15.0 OPEN 2.pdb/g" tmp.in   
	    orac < tmp.in >& tmp.out
	fi
	# check that full minimization  run was correct
    
	grep NaN tmp.out > test_file
    
	if [ ! -s test_file  ] ; then
	    #       check for ***** and for stretching OK 	
	    energy=`tail -35 tmp.out  | grep Tstep | awk '{if($6>2000.0 || substr($6,1,1)=="*") {print "1000"}else {print "ok"}}'`
	    if [[ $energy == "1000" ]] ; then
		echo "        orientation " $jr " failed full minimization"
		echo "        orientation " $jr " failed full minimization" > error
		tail -35 tmp.out  | grep Tstep | awk '{print "        Time/Energy = "$3,$6}'
	    else
		if [ ! -z $energy ] ; then
		    n=`tail -2 2.pdb   | head -1 | awk '{print $2+2}'`
		    cp tmp.in tmp${gap_n}.in ; cp tmp.out tmp${gap_n}.out ; cp rec+gap.pdb rec+gap${gap_n}.pdb
		    tail -35 tmp.out  | grep Tstep | awk '{print "        Time/Energy = "$3,$6}'
		    tail -$n 2.pdb > ${code}_gap${J}_last.pdb
		    echo "        Exiting gap "$J " rotation " $jr
		    cp x.pdb gap_ok${gap_n}.pdb
		    rm error >& /dev/null
		    if [ $dogaps == "ALL" ] ;then
			echo "gap " $j " done"
		        break
		    else
			exit
		    fi
		else
		    echo "       Something went wrong. Output from ORAC follows"
		    tail tmp.out
		    exit
		fi	 	    
	    fi
	else
	    echo "        orientation " $jr " failed full minimization for NaN" 
	fi
	fi # closes if [ $jump == 0  ] 
    done
    if [ ! -s gap_ok${gap_n}.pdb ]; then
	echo "ERROR: could not fix gap " $gap_n"; program stops"
	exit
    fi
done

##########################################################
# CONTINUE the EXECUTION after choices (join_allgaps.bash) 
##########################################################

if [[ ! -s chain.pdb || $err == 1 ]]; then
    echo ".. pdbrestore FAILED"
    exit
fi
cp 0.in tmp.in
sed -i "s/THREADS 1/THREADS 6/g" tmp.in   

# prepare the sequence removing first and last residue for each gap"
((j=0))
for i in `ls  gap_ok*.pdb` ; do
    ((j=j+1)); 
    first=`head -$j gaps | tail -1 | awk '{print $2}'`
    last=`head -$j gaps | tail -1 | awk '{print $3}'`
    awk -v n1=$first -v n2=$last '{if(NF==3 && $2=="!" ) {if($3!=n1 && $3!=n2) {print }}else {print}}' tmp.in   > tmp1.in
    mv tmp1.in tmp.in 
done
cp rec.pdb tmp.pdb
((j=0)) 
#   loop on gaps
for i in `ls  gap_ok*.pdb` ; do
    ((j=j+1)); 
    J=`echo $j | awk '{printf "%02d",$1}'`
    first=`head -$j gaps | tail -1 | awk '{print $2}'`
    last=`head -$j gaps | tail -1 | awk '{print $3}'`
    ((ng=$last + 1 ));
    awk -v n=$first '{if(int(substr($0,23,4))!=n) print }' tmp.pdb |  awk -v n=$last '{if(int(substr($0,23,4))!=n) print }' > tmp1.pdb
    cat $i  tmp1.pdb | insert_gap.awk -v ng=$ng > tmp.pdb
    if [ -s fort.101 ] ; then
	cat subseq.$J tmp.in | sed "s/$unk/ala/g" | insert_seq.awk -v ng=$ng > tmp1.in
    else
	cat subseq.$J tmp.in | insert_seq.awk -v ng=$ng > tmp1.in
    fi
    mv tmp1.in tmp.in 
done
mv tmp.pdb rec+gap.pdb
mv tmp.in 00.in
sed -i "s/TIME 1.0/TIME 150.0/g" 00.in   
sed -i "s/1.0 OPEN 2.pdb/15.0 OPEN 2.pdb/g" 00.in   
fix_cys.bash  00.in rec+gap.pdb

# add metals (if any)

rm metals.pdb >& /dev/null
#get pdb coordinates of metals
for i in $metals ; do
    awk -v c=$chain -v mtl=$i '{if($1=="ENDMDL") {exit};  if(( substr($0,22,1)==c || substr($0,22,1)==" " ) && $1~"^HETATM" && $NF==mtl)  print }' $code.pdb >> metals.pdb
done

#get residue  type of metals
rm metals.res >& /dev/null
for i in $metals ; do 
    awk -v c=$chain -v mtl=$i '{if($1=="ENDMDL") {exit};  if(( substr($0,22,1)==c || substr($0,22,1)==" " ) && $1~"^HETATM" && $NF==mtl)  print  $NF }' $code.pdb | awk '{print tolower($1)}' >> metals.res
done

if [ -s metals.pdb ] ; then
    echo "fixing metals .. "
    cat rec+gap.pdb metals.pdb| grep -v "END" > tmp.pdb; mv tmp.pdb rec+gap.pdb
    sed $'/endsolute/{e cat metals\.res\n}' 00.in > tmp.in # insert residue metal types BEFORE 'endsolute' in template 
    mv tmp.in 00.in
fi

# fix the ligand as last residue to avoid problems with ADD_TPG and residue numbering    

LIG=`echo $lig | awk '{print tolower($1)}'`
if [ -s fort.101 -a $LIG == $unk ] ; then
    echo "!! WARNING  !! WARNING  !! WARNING "
    echo " ligand $lig is also found in the sequence SEQRES and in gaps"
else
    if [ -s lig-p1.pdb -a -s prima/prima_ok ] ; then
	echo "fixing ligand .. "
	cat rec+gap.pdb lig-p?.pdb| grep -v "END" > tmp.pdb; mv tmp.pdb rec+gap.pdb
	#insert ligands after before 'endsolute' in template 
	for i in $(seq 1 $nlig) ; do 
	    sed -i '/endsolute/i lig ' 00.in
	done
	cp 00.in tmp.in
	awk '{if ($1=="&PARAMETERS") {print $0; print "   READ_TPG_ASCII lig-p.tpg"; print "   READ_PRM_ASCII lig-p.prm"} else {print}}' tmp.in > 00.in
    fi
fi

# now transfor all CYS-binding metals from cysh to cysm
if [ -s metals.pdb ] ; then
    fix_cys_zn.bash 00.in rec+gap.pdb
fi
    
echo "launching orac on $code with all gaps filled.."
orac < 00.in >& 00.out

if [ -s 2.pdb ] ; then
    echo " "
    grep COMPLETED 00.out > test_file
    if [ ! -s test_file ]; then
	echo " *******************************************"
	echo " ERROR: Program did not complete. Try again "
	echo " *******************************************"
	exit
    fi
    n=`tail -2 2.pdb   | head -1 | awk '{print $2+2}'`
    tail -$n 2.pdb  > ${code}_fixed.pdb
    sr=`head -1 chain.pdb | awk '{printf "%02d\n", substr($0,23,4)}'`
    if [ $sr != 1 ]; then
	awk -v sr=$sr '{if($1=="ATOM") {printf "%22s",substr($0,1,22); printf "%4d",$5+sr-1; printf "%28s\n",substr($0,27,28)}}' ${code}_fixed.pdb > tmp.pdb;
	mv tmp.pdb ${code}_fixed.pdb
    fi
    if [ $water != 0 ] ; then
	solvate 00.in $water 
    fi
    
    if [ $zip == 1 ]; then   # compress if required
	if [ $lig != "NONE" ]; then
	    cp prima/file.itp lig-g.itp
	    tar zcf ${code}_fixed.tar.gz ${code}_fixed*.pdb lig-[pg].*
	else
	    tar zcf ${code}_fixed.tar.gz ${code}_fixed*.pdb 
	fi
    fi
    echo " -------------------------------------------------------------- "
    echo "        PDB fixed!! Stored in  ${code}_fixed.pdb(.gz)         " 
    tail -35 00.out  | grep Tstep | awk '{print "        Time/Energy = "$3,$6}'
    if [ -s fort.101 ] ; then
	echo " "
	echo "        !! WARNING !! !! WARNING !! !! WARNING !!  "
	echo "        Unknown residue '$unk' mutated into ala"
	rm fort.101
    fi
    echo " -------------------------------------------------------------- "
    if [ $clean == 1 ]; then
	echo " cleaning..."
	mv ${code}_fixed.pdb SAVEPDB_fixed >& /dev/null
	mv ${code}_fixed.pdb.gz SAVEPDB_fixed.gz >& /dev/null
	mv $code.pdb SAVEPDB
	rm -fr error test* *tmp* *.out gap* GAP* *.in *seq* lig*.* SEQ* *.psf *.pdb fort.* prima >&/dev/null
	mv SAVEPDB_fixed  ${code}_fixed.pdb >& /dev/null
	mv SAVEPDB_fixed.gz  ${code}_fixed.pdb.gz >& /dev/null
	mv SAVEPDB $code.pdb 
    fi
else
    echo "  Something went awry. So sorry...." 
fi
exit


