#!/bin/bash
#  download pdb file from PDB and finds gap hetero, metals and chains in the pdb file 
#  and printout relevant info 
# 
# syntax: 
#   ./find_gap.bash [-f] pdbcode
#    "pdbcode" is the alphanumeric code of the PDB file
#    if -f option is used; no wget from PDB ; file with pdbcode=xxxx 
#    [ i.e. pdbxxxx.ent] is assumed to be local  
#

local=0
while getopts ":fl" opt; do 
    case $opt in 
	f) 
	    local=1; 
	    ;;
	l) 
	    local=1; 
	    ;;
	\?) 
	    echo "invalid option"
	    ;;
    esac
done
if [ $# == "0" ] ; then 
    echo "This code get a PDB file from the PDB and generate essential info (e.g. ligand name, chains etc) "
    echo " "
    echo " Syntax: check_pdb_code.bash PDBCODE "
    echo "         whre PDBCODE is any four-alphanumeric pdb code on the PDB"
    exit
fi

code=${!#}

# wget the file if -f is not given 
if [ $local != 1 ] ; then 
    wget ftp://ftp.ebi.ac.uk/pub/databases/pdb/data/structures/all/pdb/pdb${code}.ent.gz; 
    gzip -d pdb${code}.ent.gz ; mv   pdb${code}.ent ${code}.pdb
else
    mv $code . 
    extf=`echo $code | awk '{substr($1,length($1)-3),4)}'`
    echo "HERE" $extf "<br>"
    if [ $extf != ".pdb" ] ; then
	echo  "Not a pdb file"
	exit
    else
      code=`echo $code | awk '{substr($1,1,length($1)-4)}'` 
      echo  "newcode " $code $extf  
      exit
    fi
fi

# scanning for general info 
echo ""  > $code.OUT
echo " == GENERAL SECTION  " >> $code.OUT
( printf "    NRESIDUE "; grep "ATOM " ${code}.pdb | grep " CA " | wc -l  ) >> $code.OUT
awk '/TITLE/{print "   ", $0}' ${code}.pdb >> $code.OUT
awk '/DOI/{print "   ", $0}' ${code}.pdb >> $code.OUT
 
# scan for chains info
echo "" >> $code.OUT
echo " == CHAIN SECTION " >> $code.OUT
awk '{\
    if($1=="ATOM") {\
	{\
	    c=substr($0,22,1); \
	    n[c]+=1;\
	} \
    }\
}\
END {\
    if(n["A"]!=0) print "    chain A", n["A"];  \
    if(n["B"]!=0) print "    chain B", n["B"];  \
    if(n["C"]!=0) print "    chain C", n["C"];  \
    if(n["D"]!=0) print "    chain D", n["D"];  \
    if(n["E"]!=0) print "    chain E", n["A"];  \
    if(n["F"]!=0) print "    chain F", n["B"];  \
    if(n["G"]!=0) print "    chain G", n["C"];  \
    if(n["H"]!=0) print "    chain H", n["D"];  \
}' ${code}.pdb >> $code.OUT



# scanning for gaps 
echo "" >> $code.OUT
echo " == GAP SECTION  " >> $code.OUT

grep -e "^ATOM " ${code}.pdb | grep " CA " | awk \
'{\
    ires=substr($0,23,4); name=substr($0,18,3); chain=substr($0,22,1); dr=ires-ires_old;\
    if(dr>1) {\
       printf "%5d%5s%2s%5d%5s%2s  GAP%4d\n",  ires_old,name_old,chain_old,ires,name,chain,dr\
    };\
    ires_old=substr($0,23,4);\
    name_old=substr($0,18,3);\
    chain_old=substr($0,22,1)\
}' >> $code.OUT 

# search for hetero and metals 
echo "" >> $code.OUT
echo " == HETERO ATOMS SECTION "  >> $code.OUT

hetero.awk  ${code}.pdb >> $code.OUT

cat $code.OUT


