//   -*- C++ -*-
/*****************************************************************************
 *
 *   |_|_|_  |_|_    |_    |_|_|_  |_		     C O M M U N I C A T I O N
 * |_        |_  |_  |_  |_        |_		               N E T W O R K S
 * |_        |_  |_  |_  |_        |_		                     C L A S S
 *   |_|_|_  |_    |_|_    |_|_|_  |_|_|_|_	                 L I B R A R Y
 *
 * $Id: BatchMeans.c,v 1.4 1996-08-07 18:02:42+02 steppler Exp $
 *
 * Class: CNBatchMeans --- Batch Means Ecaluation
 *
 *****************************************************************************
 * Copyright (C) 1992-1996   Communication Networks
 *                           Aachen University of Technology
 *                           D-52056 Aachen
 *                           Germany
 *                           Email: cncl-adm@comnets.rwth-aachen.de
 *****************************************************************************
 * This file is part of the CN class library. All files marked with
 * this header are free software; you can redistribute it and/or modify
 * it under the terms of the GNU Library General Public License as
 * published by the Free Software Foundation; either version 2 of the
 * License, or (at your option) any later version.  This library is
 * distributed in the hope that it will be useful, but WITHOUT ANY
 * WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
 * License for more details.  You should have received a copy of the GNU
 * Library General Public License along with this library; if not, write
 * to the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139,
 * USA.
 * 
 * As an exception to this rule you may use this template to generate
 * your own classes. This does not cause these classes to be covered by
 * the GNU Library General Public License. This exception does not
 * however invalidate any other reasons why the resulting program must be
 * covered by the GNU Library General Public License.
 *****************************************************************************/

#include <CNCL/Class.h>

#include "BatchMeans.h"


/*
  Note: Formulae used in this implementation (esp. Bayes Error of Mean
        and Variance) are based on:
	F.Schreiber, Improved Simulation by Application of the Objective
	Bayes-Statistics, AEUE Bd 34, pp 243-249, 1980
*/


// constructor for fixed length evaluation
CNBatchMeans::CNBatchMeans(double from, double to, long inter,
			   long sog, long nog, short conf,
			   const char *nname, const char *ttext)
    : CNStatistics(nname, ttext)
{
    // get (and check) parameters
    bottom = from; top = to;
    if (top <= bottom)
    {
	fatal("CNBatchMeans:", " wrong limits: top <= bottom.");
	exit(1);
    }
    intervals = (inter > 0) ? (unsigned long) inter : 10ul;
    int_step = (top - bottom) / intervals; // calculate it only once
    
    size_of_groups = (sog > 0) ? (unsigned long) sog : 1000ul;
    if (sog < 100)
	warning("CNBatchMeans:", " size of groups is low.\n",
		"\tNormal distribution of y_i cannot be guaranteed.");
    if (nog < 3)
    {
	fatal("CNBatchMeans:", " need at least 3 groups to provide quasi\n",
	      "\tnormal distribution and quasi independence of y_i.");
	exit(1);
    }
    number_of_groups = (unsigned long) nog;

    if (conf != 95 && conf != 99)
    {
	warning("CNBatchMeans:",
		" requested confidence value not supported.\n",
		"\t Confidence set to 95.");
	confidence = 95;
    }
    else
	confidence = conf;

    fixed_length = true; // n = nog * sog

    // initialize evaluation
    phase = ITERATE; end_reached = false;
    nrv = 0;
    wasted_left = wasted_right = 0;
    min_val = DBL_MAX; max_val = -DBL_MAX;
    sum_y = qsum_y = 0.0;
    sum_gr_var = qsum_gr_var = 0.0;

    result = new Interval[intervals+1];   // statistics for each interval
    y_i = new double[number_of_groups+1]; // needed for correlation
    for (unsigned long i = 0; i < number_of_groups+1; i++) y_i[i] = 0.0;
    actual_group = new Group(size_of_groups, intervals);
    act_group = 0;
}



// constructor for variable {f(err)} length evaluation
CNBatchMeans::CNBatchMeans(double from, double to, long inter,
			   long sog, double err, short conf,
			   const char *nname, const char *ttext) 
    : CNStatistics(nname, ttext)
{
    // get (and check) parameters
    bottom = from; top = to;
    if (top <= bottom)
    {
	fatal("CNBatchMeans:", " wrong limits: top <= bottom.");
	exit(1);
    }
    intervals = (inter > 0) ? (unsigned long) inter : 10ul;
    int_step = (top - bottom) / intervals; // calculate it only once
    
    size_of_groups = (sog > 0) ? (unsigned long) sog : 1000ul;
    if (sog < 100)
	warning("CNBatchMeans:", " size of groups is low.\n",
		"\tNormal distribution of y_i cannot be guaranteed.");

    if (err <= 0.0 || err >= 1.0)
    {
	fatal("CNBatchMeans:", " 0.0 < error < 1.0 !");
	exit(1);
    }
    max_err = err;

    if (conf != 95 && conf != 99)
    {
	warning("CNBatchMeans:",
		" requested confidence value not supported.\n",
		"\t Confidence set to 95.");
	confidence = 95;
    }
    else
	confidence = conf;

    fixed_length = false; // n = sog * f(err)

    // initialize evaluation
    phase = ITERATE; end_reached = false;
    nrv = 0;
    wasted_left = wasted_right = 0;
    min_val = DBL_MAX; max_val = -DBL_MAX;
    sum_y = qsum_y = 0.0;
    sum_gr_var = qsum_gr_var = 0.0;

    result = new Interval[intervals+1];   // statistics for each interval
    number_of_groups = 10ul;              // estimated number of groups!
    y_i = new double[number_of_groups+1]; // needed for correlation
    for (unsigned long i = 0; i < number_of_groups+1; i++) y_i[i] = 0.0;
    actual_group = new Group(size_of_groups, intervals);
    act_group = 0;
}



// destructor
CNBatchMeans::~CNBatchMeans()
{
    delete actual_group; // contains group data
    delete [] y_i;       // all group means
    delete [] result;    // result table
}



/*** Group functions ***/
// 1.) constructor
CNBatchMeans::Group::Group(unsigned long sizeg, unsigned long sizei)
{
    histo = new unsigned long[inter = sizei]; // histogram
    for(unsigned long i = 0; i < inter; i++) histo[i] = 0;
    lower = higher = 0;
    sum_x = qsum_x = 0.0;
    remaining = sog = sizeg;
}

// 2.) destructor
CNBatchMeans::Group::~Group()
{
    delete [] histo; // histogram
}

// 3.) reset
void CNBatchMeans::Group::reset()
{
    for(unsigned long i = 0; i < inter; i++) histo[i] = 0;
    lower = higher = 0;
    sum_x = qsum_x = 0.0;
    remaining = sog;
}

    
    
/*** Interval function ***/
// 1.) constructor
CNBatchMeans::Interval::Interval()
{
    hits = 0;
    sum_rh = qsum_rh = 0.0;
    sum_f  = qsum_f  = 0.0;
    sum_g  = qsum_g  = 0.0;
}

// 2.) reset
void CNBatchMeans::Interval::reset()
{
    hits = 0;
    sum_rh = qsum_rh = 0.0;
    sum_f  = qsum_f  = 0.0;
    sum_g  = qsum_g  = 0.0;
}



// reset BatchMeans
void CNBatchMeans::reset()
{
    phase = ITERATE; end_reached = false;
    nrv = 0;
    wasted_left = wasted_right = 0;
    min_val = DBL_MAX; max_val = -DBL_MAX;
    sum_y = qsum_y = 0.0;
    sum_gr_var = qsum_gr_var = 0.0;    

    unsigned long i;
    for (i = 0; i < intervals+1; i++) result[i].reset();
    for (i = 0; i < number_of_groups+1; i++) y_i[i] = 0.0;
    actual_group->reset();
    act_group = 0;
}



// put values into evaluation
void CNBatchMeans::put(double x)
{
    if (end_reached) return;  // no more evaluation when ready
    
    nrv++;
    
    // check for min, max
    if (x < min_val) min_val = x;
    if (x > max_val) max_val = x;

    // calculate group stats
    actual_group->sum_x  += x;
    actual_group->qsum_x += x * x;

    // enter into histogram
    if (x <= bottom)
    {
	wasted_left++;
	actual_group->lower++;
    }
    else if (x > top)
    {
	wasted_right++;
	actual_group->higher++;
    }
    else
    {
	unsigned long index =
	    intervals - 1 - (unsigned long)((top - x) / int_step);
	// x values on intervals left limit belong to next lower interval
        //{F(x) = Prob[y <= x]; G(x) = Prob[y > x]}
	actual_group->histo[ index ]++;
    }

    // group ready?
    if (-- actual_group->remaining == 0)
    {
	eval_group();
	actual_group->reset(); // begin next group
	act_group++;
        // check if evaluation is ready
	if (( fixed_length && act_group == number_of_groups) ||
	    (!fixed_length && (act_group>4) &&
             (bayes_err() <= max_err)))
	    // possible alternative:
	    /*(mean_confidence() / mean() <= max_err)))*/ 
	{
	    phase = END;
	    end_reached = true;
	}
	// need to create more storage for y_i ?
	if (!fixed_length && (act_group >= number_of_groups))
	{
	    unsigned long i;
	    double *h = new double[number_of_groups * 2 + 1];
	    for (i = 0; i < number_of_groups+1; i++)
		h[i] = y_i[i];
	    for (i = number_of_groups+1; i < (2 * number_of_groups + 1); i++)
		h[i] = 0.0;
	    number_of_groups *= 2;
	    delete [] y_i;
	    y_i = h;
	}
    }
}



// mean value (Batch-Means variant)
double CNBatchMeans::mean() const
{
    return act_group ? sum_y / act_group : DBL_MAX;
}

// variance (Batch-Means variant)
double CNBatchMeans::variance() const
{
    return act_group ? sum_gr_var / act_group : DBL_MAX;
}

// sigma (an alternate error measurement)
double CNBatchMeans::sigma() const
{
    double var = calc_var(act_group, sum_y, qsum_y);
    return sqrt(var / 2);
}

// rel. bayes error of mean 
double CNBatchMeans::bayes_err() const
{
    return act_group > 5 ?
	sqrt(act_group / (act_group - 5.0) *
	     (qsum_y / (sum_y * sum_y) - 1.0 / act_group)) : DBL_MAX;
}

// confidence of mean (don't be too confident however!)
double CNBatchMeans::mean_confidence() const
{
    double var = calc_var(act_group, sum_y, qsum_y);
    return act_group > 1 ? calc_confidence(act_group-1, var) : DBL_MAX;
}

// correlation coefficient 1st order (should be nearly 0)
double CNBatchMeans::correlation() const
{
    if (act_group < 2) return 0.0; // just one group evaluated -> no correl.

    double rho = 0.0,
	mue = mean();
    for (unsigned long i = 0; i < (act_group - 1); i++)
	rho += ((y_i [i] - mue) * (y_i [i+1] - mue));
    rho /= ((act_group-1) * calc_var(act_group, sum_y, qsum_y));

    return rho;
}



// print results
void CNBatchMeans::print(CNStatistics::Type type, ostream& strm) const
{
    strm << "#BATCHMEANS RESULT (THIS IS A MAGIC LINE)" << endl;
    strm << "#------------------------------------------------------------------------------" << endl;
    strm << "#name: " << name << endl;
    strm << "#text: " << text << endl;
    strm << "#size of groups: " << size_of_groups;
    if (fixed_length)
	strm << "\t" << act_group << " out of " << number_of_groups
	     << " groups evaluated." << endl;
    else
	strm << "\t evaluated groups: " << act_group << "\tmax.rel.err: "
	     << max_err << endl;
    strm.setf(ios::scientific, ios::floatfield); strm.precision(6);
    strm << "#bottom: " << bottom << "\ttop: " << top << "\tno. of intervals: "
	 << intervals << endl;
    strm.setf(ios::fixed); strm.precision(2);
    strm << "#no of values: " << nrv << "\t" << wasted_left
	 << " values < " << bottom << " (" << wasted_left*100 / double(nrv)
	 << "%)\t" << wasted_right << " values >= " << top << " ("
	 << wasted_right*100 / double(nrv) << "%)" << endl;
    strm.setf(ios::scientific, ios::floatfield); strm.precision(6);
    strm << "#min. value: " << min_val << "\tmax. value: " << max_val
	 << endl;
    strm << "#mean:     " << mean() << "\t+- " << mean_confidence() << " ("
	 << confidence << "%)\t(rel. error: " << bayes_err() << ")" << endl;
    double var_var = calc_var(act_group, sum_gr_var, qsum_gr_var);
    double bayes_var_error =
	act_group > 7 ? sqrt(2.0 / (act_group - 7)) : DBL_MAX;
    strm << "#variance: " << variance() << "\t+- "
	 << calc_confidence(act_group-1, var_var) << " (" << confidence
	 << "%)\t(rel. error: " << bayes_var_error << ")" << endl;
    strm << "#sigma: " << sigma() << "\t1st order corr. coeff.: "
	 << correlation() << endl;
    strm << "#------------------------------------------------------------------------------" << endl;
  
    if (type == CNStatistics::DF) strm << "#F(x)"; else strm << "#G(x)";
    strm << "          x              rel. err       rel.prob.      conf"
	 << endl;
    strm.setf(ios::scientific, ios::floatfield);
    strm.setf(ios::right); strm.precision(6);
    Interval *res;
    double fh = wasted_left / double(nrv), gh = 1.0 - fh;
    for (unsigned long i = 0; i < intervals; i++)
    {
	res = &result[i];
	double rh = res->hits / double(nrv);
	double fg_err, fg_conf;
	if (type == CNStatistics::DF)
	{
	    fg_err  = calc_error (act_group, res->sum_f, res->qsum_f);
	    // if error cannot be calculated transpose value from DBL_MAX
	    // to 0.0 for use with drawbtm tool
	    if (fg_err == DBL_MAX) fg_err = 0.;
	    fg_conf = calc_confidence(act_group-1,
				      calc_var(act_group, res->sum_f,
					       res->qsum_f));
	    strm << fh << "   " << bottom + i * int_step << "   " << fg_err
		 << "   " << rh << "   " << fg_conf << endl;
	}
	else
	{
	    fg_err  = calc_error (act_group, res->sum_g, res->qsum_g);
	    // if error cannot be calculated transpose value from DBL_MAX
	    // to 0.0 for use with drawbtm tool
	    if (fg_err == DBL_MAX) fg_err = 0.;

	    fg_conf = calc_confidence(act_group-1,
				      calc_var(act_group, res->sum_g,
					       res->qsum_g));
	    strm << gh << "   " << bottom + i * int_step << "   " << fg_err
		 << "   " << rh << "   " << fg_conf << endl;
	}
	fh += rh; gh -= rh;
    }
    if (type == CNStatistics::DF)
	strm << fh << "   " << top << "   " << .0 << "   " << .0 << "   "
	     << .0 << endl;
    else
	strm << gh << "   " << top << "   " << .0 << "   " << .0 << "   "
	     << .0 << endl;
}


void CNBatchMeans::print(ostream& strm) const
{
    print( CNStatistics::DF, strm );
}
    
	
// probability of x (more exact probability of interval x belongs to)
double CNBatchMeans::p(double x)
{
    if (x < bottom) return wasted_left / double(nrv);
    else if (x >= top) return wasted_right / double(nrv);
    
    unsigned long index =
	intervals - 1 - (unsigned long)((top - x) / int_step);
    return result[ index ].sum_rh / act_group;
}

// f(x) (it is f[left interval limit x belongs to])
double CNBatchMeans::f(double x)
{
    if (x < bottom) return 0.0;
    else if (x >= top) return 1.0 - wasted_right / double(nrv);
    
    unsigned long index =
	intervals - 1 - (unsigned long)((top - x) / int_step);
    return result[ index ].sum_f / act_group;
}

// g(x) (it is g[left interval limit] x belongs to)
double CNBatchMeans::g(double x)
{
    if(x < bottom) return 1.0;
    else if (x >= top) return wasted_right / double(nrv);
    
    unsigned long index =
	intervals - 1 - (unsigned long)((top - x) / int_step);
    return result[ index ].sum_g / act_group;
}



// a line of the result
const struct CNBatchMeans::resultline *CNBatchMeans::get_result(long index)
{
    if ((index < min_index()) || (index > max_index())) return NIL;

    Interval *res = &result[index];
    line.x  = bottom + index * int_step;
    line.rh = res->sum_rh / act_group;
    line.fx = res->sum_f  / act_group;
    line.gx = res->sum_g  / act_group;
    line.ferr = calc_error(act_group, res->sum_f, res->qsum_f);
    line.gerr = calc_error(act_group, res->sum_g, res->qsum_g);
    line.fconf = calc_confidence(act_group-1,
				 calc_var(act_group, res->sum_f, res->qsum_f));
    line.gconf = calc_confidence(act_group-1,
				 calc_var(act_group, res->sum_g, res->qsum_g));

    return &line;
}






// evaluate a single batch
void CNBatchMeans::eval_group()
{
    // calculate group mean and group variance
    double g_mean = actual_group->sum_x / size_of_groups;
    double g_var  = (actual_group->qsum_x - actual_group->sum_x * g_mean)
	/ (size_of_groups - 1);
    
    // build some interesting sums
    sum_y  += g_mean;
    qsum_y += g_mean * g_mean;
    sum_gr_var  += g_var;
    qsum_gr_var += g_var * g_var;

    // store y_i
    y_i[ act_group ] = g_mean;
    
    // do interval calculation
    double rh,
	fh = actual_group->lower / double(size_of_groups),
	gh = 1.0 - fh;
    Interval * res;
    for (unsigned long i = 0; i < intervals; i++)
    {
	res = &result[i]; // hope it is faster this way
	rh = actual_group->histo[i] / double(size_of_groups);
	res->hits += actual_group->histo[i];
	res->sum_rh  += rh;
	res->qsum_rh += rh * rh;
	res->sum_f   += fh;
	res->qsum_f  += fh * fh;
	res->sum_g   += gh;
	res->qsum_g  += gh * gh;
	fh += rh; gh -= rh;
    }
    res = &result[intervals];
    res->hits += actual_group->higher;
    res->sum_f   += fh;
    res->qsum_f  += fh * fh;
    res->sum_g   += gh;
    res->qsum_g  += gh * gh;
}


    
// calculate variance
double CNBatchMeans::calc_var(unsigned long n,
			      double sum_x, double qsum_x) const
{
    return n > 1 ? (qsum_x - sum_x * sum_x / n) / (n - 1) : DBL_MAX;
}

// calculate bayes_err
double CNBatchMeans::calc_error(unsigned long n,
				double sum_x, double qsum_x) const
{
    if ((sum_x == 0.0) && (qsum_x == 0.0)) return DBL_MAX;
    double mue = sum_x / n;
    double var = calc_var(n, sum_x, qsum_x);
    return n > 5 ? sqrt(var * (n - 1) / (n * (n - 5))) / mue : DBL_MAX;
}
    
// calculate confidence interval (it's ugly, so i put it at the end where
// hopefully nobody will look for it :-)
double CNBatchMeans::calc_confidence(unsigned long freedom, double var) const
{
    // only first 100 degrees of freedom are calculated exact
    freedom = CNmin(freedom, 101ul);
    if (confidence == 95)
    {
#define VERT975(a) a
	static double tvert975[] = {
#include "tvert975.dat"
	};
	return tvert975[ freedom ] * sqrt(var / (freedom + 1));
    }
    else
    {
#define VERT995(a) a
	static double tvert995[] = {
#include "tvert995.dat"
	};
	return tvert995[ freedom ] * sqrt(var / (freedom + 1));
    }
}


	

/***** Default I/O member function for CNCL classes **************************/

// Debug output
void CNBatchMeans::dump(ostream &strm) const
{
    strm << "CNBatchMeans { $Revision: 1.4 $ ..."
	 << " }" << endl;
}

// IOStream operator <<
ostream &operator << (ostream &strm, const CNBatchMeans &obj)
{
    obj.print(strm);
    return strm;
}

ostream &operator << (ostream &strm, const CNBatchMeans *obj)
{
    if(obj)
	obj->print(strm);
    else
	strm << "(NIL)";
    return strm;
}



/***** CNCL stuff for type information ***************************************/

// Describing object for class CNBatchMeans
static CNClass CNBatchMeans_desc("CNBatchMeans", "$Revision: 1.4 $",
			    CNBatchMeans::new_object);

// "Type" for type checking functions
CNClassDesc CN_BATCHMEANS = &CNBatchMeans_desc;
