#! /usr/bin/awk -f
#compute the correlation coefficient between two data set  
#and the MUE
#
BEGIN{
    if(ARGC==1) {
	print ""
	print " Compute the correlation metrics between two data set"
	print " "
	print " Syntax: r.awk data_file"
	print " The fle 'data_file' must have two columns x,y "
	print " "
	print " On output one line  with 8 field is produced:"
	print " 1:R(pearson) 2:a 3:b  4:MUE  5:tau 6:Lin 7:MSE  8:Npoints"
	print " "
	exit
    }
}
{
    if(substr($1,1,1) != "#" ) {
	x[NR]=$1;y[NR]=$2;
	ssres+=($2-$1)^2
	xm+=$1; xm2+=$1^2;
	ym+=$2; ym2+=$2^2;
	mue+=sqrt(($1-$2)^2)
	me+=($1-$2)
    }
}
END {
    if(ARGC==1) { exit }
    xm=xm/NR; 
    ym=ym/NR; 
    xm2=xm2/NR
    ym2=ym2/NR
    sstot=ym2-NR*ym^2
    sx=(xm2-xm^2)^0.5;   
    sy=(ym2-ym^2)^0.5;
    for(i=1; i<=NR; i++) {
	r+=(x[i]-xm)*(y[i]-ym)
#	print (x[i]-xm),(y[i]-ym),r
    }
#    print "r " r 
    rxy=r/(sx*sy)/NR
    R2=1-ssres/sstot 
    a=rxy*sy/sx
    b=ym-a*xm
    eps=1E-15
    for(i=1; i<NR ; i++) {
	for(j=i+1; j<=NR ; j++) {
	    xsign=(x[i]-x[j])/(sqrt((x[i]-x[j])^2+eps))
	    ysign=(y[i]-y[j])/(sqrt((y[i]-y[j])^2+eps))
	    tau+=xsign*ysign 
	}
    }
#    print sx,sy,xm,ym,r
#   rc i the lin concordance coefficient
    rc=2*(r/NR)/(sx^2+sy^2 + (xm-ym)^2)
    printf "%8.2f%8.2f%8.2f%8.2f%8.2f%8.2f%8.2f%5d\n", rxy ,a,b,mue/NR,2*tau/(NR*(NR-1)),rc,me/NR,NR
}
