#! /usr/bin/awk -f
# histog - Make histogram from 1-st number in each input line ("+h" for help)
#-----------------------------------------------------------------------------
# Global variables
#   dx     channel width
#   n      channel number
#   h[n]   number of items in channel n (or total weight for this channel)
#   nmax   highest n found so far (highest populated channel)
#   nmin   lowest n found so far (lowest populated channel)
#   Omax   option: true if we must normalize to maximum equal to 1
#   Oint   option: true if we must normalize to integral equal to 1
#   Owgt   option: true if 2-nd number in each line must be used as a weight

# Local variables in the END code
#   area   total area or total h[n], is an implicit function of n
#   hnorm  histogram normalization: maximum, total integral, or 1 (default)
#   anorm  area normalization: true area or plain sum (default)

#-----------------------------------------------------------------------------
# Get option, if any, check for valid options, parse options (see usage).
BEGIN              { dx = getopt("+") }

# Skip empty lines and lines not starting with numbers
NF == 0            { next }
! /^[ \t]*[0-9.-]/ { next }

# Find appropriate channel and add proper weight to it, update nmin and nmax
                   { n = nint($1/dx); h[n] += ( Owgt ? $2 : 1 ) }
n<nmin || nmin=="" { nmin = n }
n>nmax || nmax=="" { nmax = n }

# Find hnorm as requested by command line option, print non empty channels
END  {
       for(n=nmin;n<=nmax;n++){
         if( Omax && h[n]>hnorm ){ hnorm = h[n] }
         if( Oint               ){ hnorm = hnorm + h[n]*dx }
       }
       if( hnorm<=0             ){ hnorm = 1 }
       anorm = hnorm / ( Oint ? dx : 1 )
       for(n=nmin;n<=nmax;n++){
	 area = area + h[n]
         if( h[n-1] || h[n] || h[n+1] ){ print n*dx,h[n]/hnorm,area/anorm }
       }
     }

#-----------------------------------------------------------------------------
# nint(x) - return the integer nearest to x
function nint(x)
{
  return ( x>=0 ? int(x+0.5) : int(x-0.5) )
}

#-----------------------------------------------------------------------------
# getopt(opt) - Get or default options, check and parse the options
function getopt(opt)
{
  if( ARGV[1] ~ "^\\+" ){
    opt = ARGV[1]
    ARGV[1] = ( ARGC==2 ? "-" : "" )
  }
  Omax = gsub("[mM]","",opt)
  Oint = gsub("[iI]","",opt)
  Owgt = gsub("[wW]","",opt)
  if( opt !~ "^\\+[0-9.eE+-]*$" ){ usage() }
  return ( opt+0>0 ? opt+0 : 1 )
}

#-----------------------------------------------------------------------------
# usage() - Display usage message and exit with error code 1
function usage()
{
  print "histog - Make histogram from 1-st number in each input line"
  print ""
  print "Usage:   histog [ +Options ] [ File ... ]"
  print ""
  print "Options: m      normalize to maximum equal to 1"
  print "         i      normalize to integral equal to 1"
  print "         w      use 2-nd number as a weight"
  print "         Width  channel width for the histogram"
  print ""
  print "Empty lines and lines starting with non numeric characters are"
  print "silently ignored. The defaults are: no normalization, width 1."
  print ""
  print "The output contains: channel, count, total count or integral."

  exit 1
}

