module pdb
  type :: label ! define new type label
     character*6 :: recrd
     integer :: serial
     character*4 :: name
     character*1 :: altloc
     character*3 :: resname
     character*1 :: chainid
     integer :: resseq
     character*1 :: icode
  end type label
  
contains
  
  subroutine open_pdb(unit_counter,unit_number,file_name,file_status,nato)
    ! open unit unit_number connected to the pdb file file_name
    ! count the number of atoms in the pdb 
    implicit none
    integer,intent(inout) :: unit_counter
    integer, intent(out) :: unit_number
    integer, intent(out) :: nato
    character*(*), intent(in) :: file_name
    character*3, intent(in) :: file_status
    integer :: err
    character*80 :: line
    
    unit_counter = unit_counter + 1
    
    unit_number = unit_counter
    
    select case(file_status)
    case("OLD")
       open(unit=unit_number,file=file_name,status='OLD',iostat=err)
       if(err /= 0) then 
          write(0,*) "error opening unit ", unit_number, "pdbfile ", file_name
          stop
       end if
       nato = 0
       read_loop: do 
          read(unit_number,'(A80)',iostat=err) line
          if(err < 0) exit read_loop
          select case(line(1:6))
          case("ATOM  ","HETATM") 
             nato = nato + 1
          case("END   ","TER   ","ENDMDL") 
             exit  read_loop
          case default 
             cycle read_loop
          end select
       enddo read_loop
       rewind(unit_number)
    case("NEW")
       open(unit=unit_number,file=file_name,status='NEW',iostat=err)
    case("REP")
       open(unit=unit_number,file=file_name,status='REPLACE',iostat=err)
    case("SCR")
       open(unit=unit_number,status='SCRATCH',iostat=err)
    case("UNK")
       open(unit=unit_number,file=file_name,status='UNKNOWN',iostat=err)
    end select
    
    if(nato == 0) then 
       write(0,*) "file ",file_name,"contains 0 atoms"
       stop
    end if
    
  end subroutine open_pdb
  
  
  subroutine read_pdb(nato,unit,r,pdblabel,endofpdb)
    implicit none
    integer,intent(in)     :: nato         ! number of atoms
    integer,intent(in)     :: unit         ! pdb unit
    real(4),intent(out)   :: r(:,:)       ! coordinates matrix
    type(label),intent(out):: pdblabel(:)  ! pdb label array
    logical,intent(out)    :: endofpdb          ! endofpdb
    ! local variables
    integer :: n            ! number of atoms
    integer            :: i,j
    integer            :: err       
    character*80           :: line
    
    endofpdb = .false.
    n = 0
    read_loop: do        
       read(unit,'(A80)',iostat=err) line
       if(err < 0) then 
          endofpdb = .true.
          exit read_loop
       end if
       select case(line(1:3))
       case("ATO","HET") 
          n=n+1
          read(line,1,iostat=err) pdblabel(n),(r(j,n),j=1,3)
          if(err > 0) then 
             write(0,*) 'error while reading line ',n,'of unit',unit
             stop
          end if
          pdblabel(n)%name = adjustl(pdblabel(n)%name)
       case("END","TER")  
          if(n == 0) cycle read_loop
          exit  read_loop
       case default 
          cycle read_loop
       end select
    enddo read_loop
    
1   format(a6,i5,1x,a4,a1,a3,1x,a1,i4,a1,3x,3f8.3,2f6.2)
    
  end subroutine read_pdb

  subroutine print_pdb(unit,r,n,pdblabel)
    implicit none
    integer,intent(in)     :: unit         ! pdb unit
    real(4),intent(in)    :: r(:,:)       ! coordinates matrix
    integer,intent(in)     :: n            ! number of atoms
    type(label),intent(in) :: pdblabel(:)  ! pdb label array
    ! local variables
    integer            :: i,j
    integer            :: err       

    do i = 1,n
       write(unit,1) pdblabel(i), (r(j,i),j=1,3)
    end do
    write(unit,'(a3)') 'END'
    
1   format(a6,i5,1x,a4,a1,a3,1x,a1,i4,a1,3x,3f8.3,2f6.2)
    
  end subroutine print_pdb

end module pdb

program qtcluster
  use pdb
  implicit none
  integer :: err
  integer :: i,j,k
  integer :: unit_counter=10 
  integer :: ucent
  integer :: t,t2
  real(4) :: time
  integer :: nd ! number of distances
  character(64) :: line
  character(64) :: arg
  real(4), allocatable :: d(:,:)
  integer :: nc ! number of clusters
  integer :: n,nmax
  integer :: tmax
  type :: cluster
     integer :: center
     integer :: np
  end type cluster
  type(cluster),allocatable :: cls(:)
  integer,allocatable :: ucl(:)
  integer :: dim
  !!!!!! values for these variables can be set from command line
  real(4) :: thr=-1.0
  logical :: noh=.false.
  character*64 :: selname=''
  integer :: selnum=0
  !!!!!! pdb variables 
  integer :: updb
  logical :: exist
  integer :: nato
  real(4),allocatable :: r(:,:)
  type(label),allocatable :: pdblab(:)
  logical :: endofpdb
  integer :: npdb
  integer :: nsub
  real(4),allocatable :: rsub(:,:)
  integer,allocatable :: aindex(:)
  integer,allocatable :: tag(:),tag0(:)
  integer,allocatable :: ct(:),nct(:)
  integer,allocatable :: nneigh(:)
  integer,allocatable :: uclu(:)
  character(64),allocatable :: fclu(:)
  character(64) :: frm
  integer,parameter :: ncmax=100 ! for printing

  ! syntax
  ! NAME
  !   qtcluster - cluster configurations from a pdb trajectory
  ! 
  ! SYNOPSIS
  !   qtcluster [OPTION]... -t <THRESHOLD> <FILE>
  !
  ! DESCRIPTION
  !   cluster molecular configurations from a pdb trajectory according to the quality threshold algorithm 
  !   using intra-molecular distances. by default, the program will use all atom-atom distances. 
  !
  ! -t, --threshold THRESHOLD (MANDATORY)
  !   define threshold (in angstrom) for QT algorithm
  !
  ! -noh, --nohydrogens
  !   works on heavy atoms
  !
  ! -n, --name LETTER
  !   works on atoms whose name begin with LETTER
  !
  ! -f, --file FILE
  !   reads in file FILE a subset of atoms
  ! 
  ! -h, --help
  !   print this man

  interface 
     elemental impure function aselect(noh,selname,name)
       implicit none
       logical :: aselect
       logical,intent(in) :: noh
       character*4,intent(in) :: name
       character*64,intent(in) :: selname
     end function aselect
  end interface

  n = command_argument_count()
  t = 0
  do 
     if(t == n) exit
     t = t + 1
     call get_command_argument(t,arg)
     select case(trim(arg))
     case('-t','--threshold')
        t = t + 1
        call get_command_argument(t,arg)
        read(arg,*) thr
        write(0,*) 'threshold :', thr
     case('-n','--name')
        do 
           t = t + 1
           call get_command_argument(t,arg)
           if(trim(arg(1:1)) == '-') then 
           else
              selname=arg
              selnum = len_trim(selname)
              write(0,*) 'name ', selname(1:selnum)
           end if
        end do
     case('-noh','--nohydrogens')
        noh=.true.
        write(0,*) 'hydrogen atoms are not used in the clustering'
     case('-h','--help')
        write(0,*) 'Synthax: qtcluster -t THRESHOLD [-noh] PDBFILE'
        write(0,*) '               THRESHOLD  is the min cut-off for clustering'
        write(0,*) '               PDBFILE is a traj file in PDB format        '
     write(0,*) '               -noh do clusters using heavy atoms only.    '
        stop
     case default
        inquire(FILE=trim(arg), EXIST=exist)
        if(exist) then 
           call  open_pdb(unit_counter,updb,trim(arg),'OLD',nato)
           write(0,*) 'pdb ', trim(arg)
           write(0,*) 'atoms ', nato
        else
           write(0,*) 'unknown option ', trim(arg)
           stop
        end if
     end select
  end do

  ! verify input
  if(t == 0) then 
     write(0,*) 'Synthax: qtcluster -t THRESHOLD [-noh] PDBFILE'
     write(0,*) '               THRESHOLD  is the min cut-off for clustering'
     write(0,*) '               PDBFILE is a traj file in PDB format        '
     write(0,*) '               -noh do clusters using heavy atoms only.    '
     stop
  end if
  if(thr < 0.0) then 
     write(0,*) 'threshold for QT must be set'
     stop
  end if
  if(selname(1:1) == 'H' .and. noh) then 
     write(0,*) '-noh and -n H* options are not compatible '
     stop
  end if
  if(selnum > 4) then 
     write(0,*) 'atomic name is too long ', selname
  end if

  ! allocate memory for pdb variables
  allocate(r(3,nato),pdblab(nato),stat=err)
  if(err /= 0) stop 'allocation failed for r,pdblab'

  ! fake read for npdb
  
  npdb = 0
  do 
     read(updb,*,iostat=err) line
     if(err < 0) exit
     if(adjustl(line) == 'TER' .or. adjustl(line) == 'END') npdb = npdb + 1
  end do
  write(0,*) 'npdb ',npdb

  rewind(updb)

  allocate(cls(npdb),stat=err)
  if(err /= 0) stop 'allocation failed for cls'
  allocate(tag(npdb),tag0(npdb),stat=err)
  if(err /= 0) stop 'allocation failed for tag'
  allocate(nct(npdb-1),nneigh(npdb),stat=err)
  if(err /= 0) stop 'allocation failed for nct,nneigh'


  readpdb: do t = 1,npdb
     call read_pdb(nato,updb,r,pdblab,endofpdb)
     ! exit at e.o.f
     if(endofpdb) exit readpdb
     
     ! first iteration, count and allocate memory for qt
     if(t == 1) then 
        nsub = count(aselect(noh,selname,pdblab%name))
        write(0,*) 'nsub', nsub
        nd = nsub * (nsub - 1) / 2
        allocate(aindex(nsub),rsub(3,nsub),d(nd,npdb),stat=err)
        if(err /= 0) stop 'allocation failed for aindex,rsub,d'
     end if
     
     ! compute aindex
     dim = 0
     do j = 1,nato
        if(aselect(noh,selname,pdblab(j)%name)) then 
           dim = dim + 1
           aindex(dim) = j
        end if
     end do
     
     ! compute subset of cartesian coordinates
     rsub = r(:,(/ aindex /))

     ! compute matrix of interatomic distances
     dim = 0
     do j = 1,nsub-1
        do k = j+1,nsub
           dim = dim + 1
           d(dim,t) = sqrt(sum((rsub(:,j)-rsub(:,k))**2))
        end do
     end do

     if(mod(t,100) == 0) write(0,*) 'npdb ', t
  end do readpdb
  
  !------------------------------------

  ! now compute rmsd matrix

  thr = thr**2
  write(0,*) 'computing neighbour list....'
  ! first count interactions
  nct = 0
  do t = 1,npdb-1
     if(mod(t,1000) == 0) write(0,*) t,' PDB files processed...' 
     do t2 = t+1,npdb
!        if(sum((d(:,t)-d(:,t2))**2)/real(nd) < thr) nct(t) = nct(t) + 1
        if(maxval((d(:,t)-d(:,t2))**2) < thr) nct(t) = nct(t) + 1
     end do
  end do
  write(0,*) 'Interactions counted. Allocating list array ... '
  ! then allocate ct list array
  allocate(ct(sum(nct)),stat=err)
  ! finally make ct array
  k = 0
  write(0,*) 'Allocate done. Finding all conts below threshold.. '
  do t = 1,npdb-1
     do t2 = t+1,npdb
        if(maxval((d(:,t)-d(:,t2))**2) < thr) then 
           k = k + 1
           ct(k) = t2
        end if
     end do
  end do


  ! now go for clusters

  write(0,*) 'Done! Now go for clusters...'
  tag = 0
  nc = 0
  qtloop: do 
     if(count(tag == 0) == 0) exit qtloop
     nc = nc + 1
     write(0,*) 'cl. n. ', nc
     ! find a new cluster with max population
     k = 0
     ! initialize nneigh with self-interaction
     nneigh = 0
     where(tag == 0) nneigh = 1
     do i = 1,npdb-1
        do j = 1,nct(i)
           k = k + 1
           if(tag(i) == 0 .and. tag(ct(k)) == 0) then 
              nneigh(i) = nneigh(i) + 1
              nneigh(ct(k)) = nneigh(ct(k)) + 1
           end if
        end do
     end do
     ! found the new cluster
     tmax = maxloc(nneigh,1)
     nmax = maxval(nneigh)

     
     ! store the new cluster info

     tag0 = tag
     k = 0
     do i = 1,npdb-1
        do j = 1,nct(i)
           k = k + 1
           if(tag0(i) == 0 .and. tag0(ct(k)) == 0) then 
              if(i == tmax .or. ct(k) == tmax) then 
                 tag(i) = nc
                 tag(ct(k)) = nc
              end if
           end if
        end do
     end do

     if(nneigh(tmax) == 1) tag(tmax) = nc

     cls(nc)%center = tmax
     cls(nc)%np = count(tag == nc)
  end do qtloop
  
  write(*,*) "numb. of clusters: ", nc
  write(*,'("cluster",6x,"pop",5x,"center")') 

  do j = 1,nc
     write(*,'(2x,I5,2I10)') j,cls(j)%np,cls(j)%center
  end do

  rewind(updb)
  unit_counter = unit_counter + 1
  ucent = unit_counter
  open(unit=ucent,file='centers.cl')
  allocate(uclu(nc),fclu(nc),stat=err)
  if(nc > ncmax) THEN 
     write(0,*) 'WARNING: program will print structures from the first 100 most populated clusters'
     nc = ncmax
  end if
  do j = 1,nc
     unit_counter = unit_counter + 1
     uclu(j) = unit_counter
     dim = j
     k = 1
     do
        if(dim < 10) exit
        dim = dim / 10
        k = k + 1
     end do
     write(frm,'(a5,i1,a1)') '(a1,i',k,')'
     write(line,trim(frm)) '.',j
     line = 'cluster'//trim(line)//'.pdb'
     fclu(j) = line
     open(unit=uclu(j),file=line)
  end do
  ! print out pdb 
  dim = 0
  do t = 1,npdb
     call read_pdb(nato,updb,r,pdblab,endofpdb)
     if(tag(t) <= ncmax) call print_pdb(uclu(tag(t)),r,nato,pdblab)
     do j = 1,nc
        if(t == cls(j)%center) then 
           dim = dim + 1
           write(ucent,*) 'REMARK', dim, 'clst ', j, 'pdb ', cls(j)%center, 'pop ', cls(j)%np
           call print_pdb(ucent,r,nato,pdblab)
        end if
     end do
  end do
  

  
end program qtcluster

elemental impure function aselect(noh,selname,name)
  implicit none
  logical :: aselect
  logical,intent(in) :: noh
  character*4,intent(in) :: name
  character*64,intent(in) :: selname
  integer :: selnum
  
  aselect = .true.
  selnum=len_trim(selname)
  if(selnum == 0) then 
     if(noh) aselect = name(1:1) /= 'H'
  else
     aselect = (name(1:selnum) == selname(1:selnum))
  end if
end function aselect

  
  
