  program em 
! This subroutine implement the EM alogorithm assuming a mixture of normal distributions
! returning the Crooks based Free energy estimate as DG=-RT*log(sum c_i*exp[-(<w>_i -beta*s_i^2)/RT ] .
! See on this estimate DOI: 10.1063/1.4918558
! N.B.: work file units are assumed to be kJ/mole
#ifdef _INTEL_
    use ifport
#endif
    
    implicit none 
    integer i,k,kk,kk1,kk2,seedx,ip,nw,n,status,nmax,mmax,kmax,iter,itermax,ibin,binmin,binmax,unitd
    character(len=32) :: arg,workfile,dist1,dist2,dist3,dist4,dist5 
    real*8    ::  frac,work,dummy,a,a2,pi,beta,dg,dgk,dgkmax,gi,logl,loglold,dlog,smax,eps,resol,pw,wmax,wmin,norm_x,norm1,norm2,dpw
    real*8 :: sigma
    real*8, allocatable :: wrk(:),wrkr(:),am(:),wm(:),sm(:),gnorm(:),nk(:),w(:,:),normw(:),histg(:)
#ifndef _INTEL_
    real*8  :: rand
#endif
    
! set defaults
    frac=-1.d0   ! bootstrap fraction (default is no bootstrap)
    status=0     ! IO status
    nw=-1        ! number of bootstrapped points out of nw
    MMAX=3
    eps=0.000001
    itermax=5000
! set constants
    beta=1.d0/0.593
    pi=dacos(-1.d0)

! check args number 
    if(iargc().ne.1.and.iargc().ne.3) THEN 
       write(6,*) 
       write(6,*) "Syntax: "
       write(6,*) "        em [frac seedx ] workfile"
       write(6,*) "        workfile is the work file"
       write(6,*) "           frac/seedx are optional arguments:"
       write(6,*) "           frac -> is the bootstrap fraction "
       write(6,*) "           seedx -> is the seed for random numbers"
       write(6,*) 
       write(6,*) "           if frac and seedx are not given, do a calculation on the sample"
       write(6,*) "           if 0 < frac < 1 do boostrap with resampling on a smaller sample using seedx as seed"
       write(6,*) "           if frac>=1 do bootstrap with resampling on the sample of the same size using seedx as seed"
       write(6,*) 
       go to 2
    end if

    !   parse arguments (either 1 or 3)
    if(iargc().eq.1) THEN  
       call getarg(1,workfile)  ! only the workfile is given
    else 
       call getarg(1,arg)       ! seedx and botstrap fraction are also given 
       read(arg,*) frac
       call getarg(2,arg)
       read(arg,*) seedx
       call getarg(3,workfile)
    end if
    
!   sweep the work file  and file nw    
    open(unit=10,file=workfile) 
    do while(status==0) 
       read(10,*,IOSTAT=status) 
       nw=nw+1
    end do

!   allocate all arrays 
    allocate(wrk(nw),normw(nw),am(mmax),wm(mmax),sm(mmax),gnorm(mmax),w(nw,mmax),nk(mmax),stat=status) 
    if(frac.ge.1.0d0) allocate(wrkr(nw),stat=status)

!   read work array 
    if(frac.gt.0) call srand(seedx) 
    dummy=rand()
    rewind(10)
    n=0
    wmax=-5000.d0
    wmin= 5000.d0
    if(frac.gt.0.d0.and.frac.lt.1.d0) THEN  !do bootstrap with resampling on a smalle sample
       do i=1,nw
          read(10,*) work
          if(rand().lt.frac) THEN
             n=n+1
             wrk(n)=work/4.184
             if(work.lt.wmin) wmin=work
             if(work.gt.wmax) wmax=work
          endif
       end do
    else if(frac.ge.1.d0) THEN! if frac>=1 do bootstrap with resampling on a sample of the same size
       n=nw   
       do i=1,n
          read(10,*) wrkr(i)
          if(wrkr(i).lt.wmin) wmin=wrkr(i)
          if(wrkr(i).gt.wmax) wmax=wrkr(i)
       end do
       do i=1,n
          ip=int(rand()*n)+1
          if(ip.gt.n) ip=n
          wrk(i)=wrkr(ip)/4.184  
       end do
    else ! No boostrapping do a plain calculation on the full sample.
       n=nw   
       do i=1,n
          read(10,*) work
          wrk(i)=work/4.184
          if(work.lt.wmin) wmin=work
          if(work.gt.wmax) wmax=work
       end do
    end if

    wmin=wmin/4.184
    wmax=wmax/4.184
    resol=(wmax-wmin)/100.d0
    kk1=int(wmin/resol) - 2      
    kk2=int(wmax/resol) + 2 
    allocate(histg(kk1:kk2), stat=status)  

!   open units for distribution files
! open units for distribution files
    do i=1,mmax
       WRITE (dist1,'(a,i1)') "wdist",i
       dist2=trim(dist1)//"."//adjustl(workfile)
       open(unit=10+i,file=dist2)
    end do
! n is the effective number of (bootstrapped) points used
    a=0.d0
    a2=0.d0
    do i=1,n
       a=a+wrk(i) 
       a2=a2+wrk(i)**2
    end do
    a=a/float(n)
    a2=a2/float(n)-a**2
    sigma=sqrt(a2)
!   initial guess with nmax=1 (Single Gaussian component)
    nmax=1

    wm(1)=a
    sm(1)=a2
    am(1)=1.d0

!   implement the EM iteration for 
    
!   computes the membership weights (E-step)

1   continue
    dlog=1000.d0
    iter=0
    do while (abs(dlog).gt.eps.AND.iter.lt.itermax) 
       normw=0.d0    ! normalization weight array set to zero 
       do k=1,nmax
          if(sm(k).gt.0.00001d0) THEN  
             gnorm(k)=1.d0/dsqrt(2.d0*pi*sm(k))
          else
             ! no solution with current nmax and n 
             nmax=nmax+1
             if(nmax.ge.mmax) go to 2
             go to 1
             gnorm(k)=1.d0/dsqrt(2.d0*pi*0.00001d0)
          end if
       end do
       do i=1,n
          do k=1,nmax
             normw(i)=normw(i)+am(k)*gnorm(k)*exp(-(wrk(i)-wm(k))**2/(2.d0*sm(k)))  
          end do
       end do

       do k=1,nmax
          nk(k)=0.d0
          do i=1,n
             w(i,k)=am(k)*gnorm(k)*exp(-(wrk(i)-wm(k))**2/(2.d0*sm(k)))/normw(i)
             nk(k)=nk(k)+w(i,k) 
          end do
       end do
       !  new values of the moments (M-step )
       do k=1,nmax
          am(k)=nk(k)/float(n)
          wm(k)=0.d0
          do i=1,n
             wm(k)=wm(k)+w(i,k)*wrk(i)
          end do
          wm(k)=wm(k)/nk(k)
          sm(k)=0.d0
          do i=1,n 
             sm(k)=sm(k)+w(i,k)*(wrk(i)-wm(k))**2 
          end do
          sm(k)=sm(k)/nk(k)
          if(sm(k).gt.0.00001d0) THEN  
             gnorm(k)=1.d0/dsqrt(2.d0*pi*sm(k))
          else
             ! no solution with current nmax and n 
             nmax=nmax+1
             if(nmax.gt.mmax) go to 2
             go to 1
             gnorm(k)=1.d0/dsqrt(2.d0*pi*0.00001d0)
          end if
       end do
       !  computes log likelihood 
       loglold=logl
       logl=0.d0

       do i=1,n
          gi=0.d0
          do k=1,nmax
             gi=gi+am(k)*gnorm(k)*exp(-(wrk(i)-wm(k))**2/(2.d0*sm(k)))
          end do
          logl=logl+log(gi) 
       end do
       iter=iter+1
       dlog=logl-loglold
    end do
    
!   compute free energy using EM mixture with NMAX components    
    gi=0.d0
    smax=0.d0
    unitd=10+nmax
!   find maximum dg_i component    
    dgkmax=-1000.d0 
    do k=1,nmax
       dgk=wm(k)-0.5*beta*sm(k)
       if(dgk.gt.dgkmax) THEN
          dgkmax=dgk
       end if
    end do

    do k=1,nmax
       dgk=wm(k)-0.5*beta*sm(k) - dgkmax
       if(sm(k).gt.smax) THEN
          smax=sm(k)
          kmax=k
       end if
       gi=gi+am(k)*exp(-beta*dgk)
    end do
    dg = dgkmax -log(gi)/beta

    write(unitd,'("# DG =",f10.2, " Components= ",I2, " LOGLK = ", G15.6, " npoints =",i6 , " iter=",i6)') dg, nmax,logl,n,iter
    write(6,'("# DG =",f10.2, " Components= ",I2, " LOGLK = ", G15.6, " npoints =",i6, " iter=",i6)') dg, nmax,logl,n,iter

    histg=0.d0
    binmax=-5000
    binmin= 5000
    do i=1,n 
       ibin=int(wrk(i)/resol)+1
       histg(ibin)=histg(ibin)+1.d0
       if(ibin.gt.binmax) binmax=ibin
       if(ibin.lt.binmin) binmin=ibin
    end do

    histg=histg/(float(n)*resol)

    norm_x=0.d0
    norm1=0.d0
    norm2=0.d0
    dpw=0.d0
    do i=binmin,binmax
       work=i*resol
       pw=0
       do k=1,nmax
          pw=pw+am(k)*gnorm(k)*exp(-(work-wm(k))**2/(2.d0*sm(k)))
       end do
       norm1=norm1+pw*resol
       norm2=norm2+histg(i)*resol
       dpw=dpw+(pw-histg(i))**2
       if(pw.gt.(0.1d0/dfloat(n))) THEN 
          write(unitd,'(F12.4,2f12.6)') work,pw,histg(i)
       ENDIF
    end do
    do k=1,nmax
       norm_x=norm_x+am(k)
    end do
    write(unitd,'("# DPW =",f15.7)') dpw
    do k=1,nmax
       write(unitd,'("#", I2, " c_coeff = ", f10.6," moments =", 2f15.3, " dgk =",f10.3)') &
            k,am(k),wm(k),dsqrt(sm(k)), wm(k)-0.5*beta*sm(k)
    end do
    
    if(nmax.eq.mmax) then 
       write(6,'(A)') "# program ended; free energy estimate is that with maximum LOGLK" 
    else
!      equal splitting for the next guess
       nmax=nmax+1
       do k=1,nmax
          am(k)=1.d0/float(nmax)
          wm(k)=a-sigma+2*sigma*(k-1)/float(nmax-1)
          sm(k)=a2/float(nmax)
!         write(6,'(4F8.3)')  k,am(k),wm(k),sm(k)
       end do
       go to 1   ! increment number of gaussian components
    endif
2   continue
    
 end program em
  
