#!/bin/bash
export LC_NUMERIC="en_US.UTF-8"
orac_bin=`which orac`
orac_home=`echo $orac_bin | sed "s?bin/orac??g"`
chain_gap="NULL"



nsample=0
sample=0
readlist=0
rm *.dat >& /dev/null
function testpdb {
    i=$1
    echo $i
    pdbrestore.bash -s $1 1> $i.OUT 2> /dev/null
    rm $i.pdb >&/dev/null
    nchains=`grep " chain " $i.OUT | wc | awk '{print $1}'` 
    grep " chain " $i.OUT | wc | awk -v pdb=$i '{print $1,pdb}' >> nchains.dat
    grep " chain " $i.OUT | awk -v pdb=$i '{print $3,pdb}' >> chain_lengh.dat
    grep GAP $i.OUT   | grep -v SECTION > tmp.gap
    if [ -s tmp.gap ]; then
	awk -v nc=$nchains -v pdb=$i 'BEGIN{old=0}{if(n[$3]<n[old])  {c++;print n[old],l[old]/n[old],pdb}; old=$3; n[$3]++; l[$3]+=$8} END{if(n[$3]!=old) {c=c+1;print n[$3],l[$3]/n[$3],pdb}; if(nc>c){for(i=1; i<=nc-c; i++){print "0 0 ",pdb}}}' tmp.gap >> gaps.dat
    else
	awk -v nc=$nchains -v pdb=$i 'BEGIN{for(i=1; i<=nc; i++) {print " 0 0",pdb }}' /dev/null >> gaps.dat
    fi
}    
if [ ! -d Results ] ; then
    mkdir Results
fi

while getopts ":s:l:" opt; do 
    case $opt in 
	s) 
	    nsample=$OPTARG;
	    sample=1
	    ;;
	l) 
	    list=$OPTARG; 
	    readlist=1
	    ;;
	\?) 
	    echo "invalid option"
	    ;;
    esac
done
if [ $# == "0" ] ; then 
    echo "This code process/fix PDB files from the PDB using pdbrestore producing essential info (OK/failed) "
    echo " "
    echo " Syntax: test_pdbrestore [opt] [LIST|PDBCODE]"
    echo "         PDBCODE is any four-alphanumeric pdb code on the PDB"
    echo " Options:                                                                                 " 
    echo "        -s num " 
    echo "          pick randomly num PDB files from the protein data bank"
    echo "        -l filename " 
    echo "          read the PDB files from the file 'filename' "
    echo " Examples: " 
    echo "          test_pdbrestore.bash 1fkg " 
    echo "             download and process 1fkg " 
    echo "          test_pdbrestore.bash -l Results/LIST.DATA1  " 
    echo "             process all pdb files listed in Results/LIST.DATA1 " 
    echo "          test_pdbrestore.bash -s 2000"
    echo "             download and process ~2000 files from the PDB" 
    exit
fi


if [ $# == 1 -a $sample == 0 ] ; then
    testpdb $1
fi

((j=0))
rm Results/PDB_files >&/dev/null
N=`wc $orac_home/pdb/PDB_list_16_01_2025 | awk '{print $1}'`
p=`echo $N $nsample | awk '{print $2/$1}'`
if [ $sample == 1 ]; then
    for i in `cat $orac_home/pdb/PDB_list_16_01_2025  | awk '{print substr($9,4,4)}'`; do
	((j=j+1))
	seed=`date +"%T.%N" | awk '{print substr($1,10,10)}'`
	go=`awk -v seed=$seed -v p=$p 'BEGIN{srand(seed); ok=0; x=rand(); if(x<p) {ok=1;print "OK"}else{print "no"}}' /dev/null`
	if [ $go == "OK" ]; then
	    testpdb $i 
	    echo $i >> Results/PDB_files
	fi
    done
fi
if [ $readlist == 1 ]; then
    if [ -f $list ] ; then
	for i in `cat $list` ; do
	    testpdb $i
	    echo $i >> Results/PDB_files
	done
    else
	echo " file $list non found"
    fi
fi    

exit
