#!/bin/bash
# 
# This scripts generates the input files for a ligand-receptor systems  
# 
#

clean=0
erro="none"
chain="A"
v=1
tdir=`which orac | sed "s?bin/orac?tools/templates?g"`
odir=`which orac | sed "s?bin/orac??g"`

# parse options
while getopts "c:l:rn" opt; do 
    case $opt in 
	r) 
	    clean=1; 
	    ;;
	c) 
	    chain=$OPTARG; 
	    ;;
	l) 
	    lig=$OPTARG; 
	    ;;
	n) 
	    v=0
	    ;;
	\?) 
	    echo "invalid option"
	    ;;
    esac
done


filename=${!#}

if [ -z $1 ]; then
    echo " " 
    echo " This scripts generates the ORAC input files for a ligand-receptor systems  "
    echo " Syntax: " 
    echo  $0 " [ -r ] -c CHAIN -l LIG pdbcode" 
    echo " " 
    echo  "    Args:" 
    echo  "         pdbcode is the four digits alphanumeric PDB code of the ligand-target system " 
    echo  "    Options:"
    echo  "         -r  " 
    echo  "          clean directory after execution (only ORAC input files are spared)"  
    echo  "         -c CHAIN   " 
    echo  "          specifies the chain (where CHAIN may be  A,B,C...; default is 'A')"
    echo  "         -l LIGAND   " 
    echo  "          specifies the liganame RESNAME (3 digits)  in the PDB "      
    echo  "         -n   " 
    echo  "          do not use vmd for fixing the protein                 "      
    exit
fi
if [ ! -f $filename.pdb ]; then
    echo "wrong or missin pdb code"
    echo " Syntax: " 
    echo  $0 " [ -r ] -c CHAIN -l LIG pdbcode" 
    exit
fi

function seqgen {
    grep " CA " rec.pdb  | awk -v ll=$1 'BEGIN{nter="-h"; cter="-o"} { i++; if(NR==1) {printf "%4s%2s", tolower($4),nter} else{ printf "%4s", tolower($4)}; if(i==ll) {printf "\n"; i=0}}END{printf "-o\n"}' > seq.tmp
    if [ -s lig-p.pdb -a -s lig-p.tpg -a -s lig-p.prm ] ; then
	resname=`awk '{if($1=="RESIDUE") {print $2}}' lig-p.tpg`
	cat  $tdir/0.in seq.tmp  $tdir/1.in  > test.in
	cat  $tdir/0.in seq.tmp  $tdir/1.in | sed "s/resname/$resname/g" | sed "s/PDBF/$filename/g" | sed "s?ORAC_HOME?$odir?g" > $filename.in
	sed "s/     1 /     0 /g" lig-p.pdb > tmp.lig
	cat tmp.lig  rec.pdb > ${filename}.pdb
	cp lig-p.tpg ${filename}.tpg
	cp lig-p.prm ${filename}.prm
	echo "input files generated; running orac..."
    else
	echo ""
	echo  " *** ERROR " 
	echo  " primadorac generated ligand tpg/prn/pdb files not found. " 
	echo  " Save ligand coordinates from file " $filename.pdb " in a file named 'lig.pdb'  and launch primadorac as "
	echo  " $ORAC_HOME/tools/primadorac.bash -gdp lig.pdb"  
	echo ""
	exit
    fi
}

if [ -z  $lig ] ; then
    echo " No ligand name specified. Issue the ligand name using opt -l " 
    echo " To find out ligname use the orac-tool check_pdb_code.bash"
    exit
fi

# isolate selected chain (protein only)
awk -v c=$chain '{if(substr($0,22,1)==c && $1 ~ "^ATOM") print}' $filename.pdb  > chain.pdb

# fix broken residues (no gap fixing) using psf.pgn 
if [ ! -z `which vmd` -a $v == 1 ] ; then
    echo "fixing broken residues using vmd1.9"
    cp `which psf.pgn` .
    pathlib=`which vmd | sed "s?bin/vmd??g"`
    sed -i "s?/usr/local/?$pathlib?g" psf.pgn
    vmd -dispdev text -e psf.pgn >& /dev/null
    sed -i "s/HIS/HSE/g" rec.pdb
else
    echo "vmd not found or not used (-n option); using uncorrected PDB coordinates; Incomplete residues may be there... "
    mv chain.pdb rec.pdb
    sed -i "s/HIS/HSE/g" rec.pdb
fi 

# isolate selected ligand
awk -v c=$chain -v l=$lig '{if(( substr($0,22,1)==c || substr($0,22,1)==" " )&& substr($0,18,3)==l) print }' $filename.pdb > lig.pdb 
if [ -s lig.pdb ]; then
    echo "Launching primadorac.."
    if [ ! -d prima ]; then 
	mkdir prima
    fi
    cd prima
    cp ../lig.pdb . 
    primadorac.bash  -gpn lig.pdb >& prima.out 
    if [ -f lig-min-p.pdb ]; then 
	cp lig-p.pdb ../
	cp lig-p.prm ../
	cp lig-p.tpg ../
    else
	echo "primadorac Failed. Output follows"
	cat prima.out 
	exit
    fi
    cd ../
else
    echo "Error: No ligand  lig.pdb  generated or empty file"
    echo "       check that the 3-chars ligand name in the PDB file is correct"
#    rm lig.pdb >& /dev/null
   exit
fi


# Extract sequence
seqgen 13

# before running ORAC check whether tpgpath is too long

len=`grep READ_TPG_ASCII $filename.in | grep amber | awk '{print length($2)}'`

if (( $len > 56 )); then
    sed -i  "s?$HOME?\~?g" $filename.in
fi

orac < ${filename}.in > tmp.out;

erro=`check_orac_output.awk tmp.out`
# this is fixable cysh error 
echo " orac finished. Check for fixable errors"

if [ "$erro" == "e13" ] ; then 
    echo " fixing wrong final residue (13-12).."
    seqgen  12
    if (( $len > 56 )); then
	sed -i  "s?$HOME?\~?g" $filename.in
    fi
    orac < ${filename}.in > tmp.out;
    erro=`check_orac_output.awk tmp.out`
fi

if [ "$erro" == "cysh" ] ; then
    echo " fixing cys-> cysh and re-launching orac..."
    sed -i "s/cys/cysh/g" ${filename}.in
    orac < ${filename}.in > tmp.out;
fi
if [ "$erro" == "ot2" ] ; then
    echo " fixing ot2 -> oc2 h and re-launching orac..."
    sed -i "s/ot2/oc2/g" ${filename}.in 
    sed -i "s/ot1/oc1/g" ${filename}.in
    orac < ${filename}.in > tmp.out;
fi

check_disulphide.awk ${filename}.pdb | tac > tmp.disul
nlines=`wc -l tmp.disul | awk '{print $1}'`

if [ $nlines != "2" -a $nlines != "0" ] ; then
    echo " cys-cys bonds found; updating the input accordingly... "
    # remove HG1 atoms by psfgen from pdb   
    for i in `awk '{if(NF==6) printf "%5d%5d", $5,$6}' tmp.disul `; do
	awk -v n=$i '{if($1!="REMARK") {nres=substr($0,23,4); if(nres!=nres_old) {ires++; nres_old=nres}; if(ires==n && $3=="HG1") {found=1}  else {print} }}' $filename.pdb >  tmp.pdb
	mv tmp.pdb $filename.pdb
    done
    # update seq.tmp
    sed -i "s/cys/cysh/g" seq.tmp
    for i in `awk '{if(NF==6) printf "%5d%5d", $5-1,$6-1}' tmp.disul `; do
	awk -v n=$i '{for(i=1; i<=NF; i++) { j++; if($i=="cysh" && j==n)  {print "cys"} else {print $i}}}' seq.tmp  | awk '{i++;printf "%6s", $1; if(i==10) {printf "\n"; i=0}}' > tmp.seq
	mv tmp.seq seq.tmp
    done
    #insert new sequence in input file
    awk '{print $0}' seq.tmp > tmp.seq; mv tmp.seq seq.tmp
    cat  $tdir/0.in seq.tmp  $tdir/1.in | sed "s/resname/$resname/g" | sed "s/PDBF/$filename/g" | sed "s?ORAC_HOME?$odir?g" > $filename.in
    
    ncys=`wc -l  tmp.disul | awk '{print $1}'` 
    for((i=1; i<=$ncys ; i++)) ; do
	rm 1.pdb >& /dev/null
	linecys="`head -$i tmp.disul | tail -1`"
        sed -i "s/#TPGCYS/#TPGCYS\n$linecys/g" ${filename}.in
	orac < ${filename}.in > tmp.out;
    done
fi

if [ -s 1.pdb ] ; then
    echo " Everything seems OK !! 1.pdb is not empty."
elif [ `grep atom tmp.out | awk '{print $1}'` == "atom" ] ; then
    echo "orac failed; testing whether first residue is zero in the PBD"
    awk '{if($1=="REMARK") {ok=1}; if(ok==1) {print}}' $filename.pdb  > prot.pdb
    awk '{print;  if($1=="REMARK") {exit }}' $filename.pdb  > liga.pdb
    sed -i "s/  0  /  2  /g"  prot.pdb
    cat liga.pdb prot.pdb > $filename.pdb
    orac < $filename.in > tmp.out
    if [ -s 1.pdb ] ; then 
	echo " Everything seems OK !! 1.pdb is not empty."
    else
	tail -20 tmp.out
	echo " Still something wrong; try to relaunch test_pdb_with_orac"
    fi
fi


if [ $clean ==  1 ] ; then
    for i in `ls *` ; do 
        if [[ "$i" != ${filename}.* ]]; then # clean everything except input files 
	    rm -f $i 
	fi
    done
fi

