I have updated some of the ANOVA code.
Unfortunately, I am quite busy with my own work, so the significant delay. But I was able to - hopefully - change the ANOVA code to something working. There is a need to adapt this code for the multiple matrices scenario, implement a UI, and much more. BUT this is the core code implementing the ANOVA calculations. The rest can be done easy by those who understand the OOo code. Unfortunately, the lack of useful comments prevents me to have a deeper understanding of OOo. (I looked at ScInterpreter::IterateParameters(), BUT did not understand much.)
Calc-ANOVA-No_ARRAY.cpp - contains the code for one-way ANOVA (NO block type),- all input is in the form of one data matrix (every column is a different variable)
- have removed most dynamic arrays (except for two, which are initialized and destroyed properly) ANOVA.Test.Code.cpp- contains the same C++ code adapted so that it can be compiled with MS VC 6.0 (independent of OOo code) - this way, it is possible to test the code without having to (re)compile OOo
- see also the 'ANOVA-Test.gnumeric' ANOVA-Test.gnumeric - gnumeric file to test the results of the ANOVA code - it actually was correct Kind regards, Leonard Mada
void ScInterpreter::ScANOVA()
{
// WE GET EITHER A SINGLE MATRIX WHERE EVERY COLUMN IS A SEPARATE
VARIABLE
// DISADVANTAGE: ONLY ONE COLUMN PER VARIABLE
// ADVANTAGE: FAST AND EASY TO WRITE
// OR MULTIPLE MATRICES, EACH MATRIX IS ONE VARIABLE
// DISADVANTAGE:
// [CALC FUNCTIONS ACCEPT ONLY 30 PARAMS
// SO THERE ARE AT MOST 30 VARIABLES]
// CUMBERSOME TO WRITE ALL MATRICES
// STORES ACTUALLY THE NUMBER OF VARIABLES
SCSIZE iVarNr = GetByte() /* NUMBER OF PARAMETERS */;
SCSIZE iVarTmp = iVarNr; // NUMBER OF DATA MATRICES
if ( iVarNr == 0 /* NO PARAMETERS */)
return; // EXIT
if ( iVarNr == 1 /* ONLY ONE PARAMETER */ ) {
// SEE ONE PARAMETER CASE ::ScANOVAMono()
// CALL TO THAT FUNCTION OR USE SWITCH CASE
return;
}
// ...
// MORE COMPLEX CODE
// IN PRINCIPLE SIMILAR TO THE MORE SIMPLE ScInterpreter::ScANOVAMono()
}
void ScInterpreter::ScANOVAMono()
{
// WE GOT A SINGLE MATRIX WHERE EVERY COLUMN IS A SEPARATE VARIABLE
// DISADVANTAGE: ONLY ONE COLUMN PER VARIABLE
// BUT IT IS EASYER TO USE AND IT IS NOT LIMITED TO 30 VARIABLES
ScMatrixRef pMat = GetMatrix();
if (!pMat) {
// NO DATA MATRIX - INVALID PARAMETERS
SetIllegalParameter();
return;
}
SCSIZE iVarNr, nRMax;
// WE HAVE ONLY ONE MATRIX
// WE CONSIDER EVERY COLUMN AS A SEPARATE DATA SET
pMat->GetDimensions(iVarNr, nRMax);
if( iVarNr == 1 ) {
SetNoValue();
return; // ONLY ONE VARIABLE - ANOVA NOT POSSIBLE
}
SCSIZE *nR = new unsigned int[iVarNr];
// THIS WILL STORE THE NUMBER OF DATA VALUES FOR EACH VARIABLE
SCSIZE dfB = 0; // DEGREES OF FREEDOM
SCSIZE dfE = 0; // DEGREES OF FREEDOM
SCSIZE N = 0; // TOTAL NUMBER OF DATA VALUES
SCSIZE iCount = 0; // INDEX POINTING TO CURRENT VARIABLE
SCSIZE jCount = 0; // NUMBER OF VALUES FOR CURRENT VARIABLE
double fSumM = 0.0; // THIS IS THE GRAND MEAN
// WE NEED AN ARRAY TO STORE THE MEAN FOR EVERY GROUP (VARIABLE)
double *fSumX = new double[iVarNr]; // THE MEANS FOR THE INDIVIDUAL VARS
double fValX; // THE
INDIVIDUAL VALUES
for (iCount = 0; iCount < iVarNr; iCount++) {
fSumX[iCount] = 0.0; // INITIALIZE THE PARTIAL SUM
for (SCSIZE j = 0; j < nRMax; j++)
{
if (!pMat->IsString(iCount,j))
{
fSumX[iCount] +=
pMat->GetDouble(iCount,j);
jCount++;
}
}
fSumM += fSumX[iCount];
// GRAND TOTAL
fSumX[iCount] = fSumX[iCount] / jCount; // THIS IS THE
MEAN
nR[iCount] = jCount; // STORE HOW MANY DATA VALUES WE
HAVE
N += jCount; // THIS IS THE TOTAL NUMBER OF
VALUES
jCount = 0; // RESET jCount FOR NEXT
VARIABLE
} // END OUTER FOR LOOP
dfB = iCount - 1; // CALCULATE THE DEGREES OF FREEDOM (df)
dfE = N - iCount;
fSumM = fSumM / N; // THIS IS THE GRAND MEAN
double fMSB = 0.0; // THIS IS INTER-GROUP VARIANCE
double fMSE = 0.0; // THIS IS INTRA-GROUP VARIANCE (DUE TO ERROR)
for (iCount = 0; iCount < iVarNr; iCount++) {
for (jCount = 0; jCount < nRMax; jCount++)
{
if (!pMat->IsString(iCount,jCount))
{
fValX = pMat->GetDouble(iCount,jCount);
fMSE += (fValX - fSumX[iCount]) * (fValX -
fSumX[iCount]);
}
}
fMSB += nR[iCount] * (fSumM - fSumX[iCount]) * (fSumM -
fSumX[iCount]);
} // END OUTER FOR LOOP
fMSB = fMSB / dfB;
fMSE = fMSE / dfE;
PushDouble( fMSB/fMSE );
// TODO:
// - WE STILL NEED TO INTERPRET fMSB/fMSE USING THE F STATISTICS
// - THIS IS DONE USING: =FDIST(fMSB/fMSE; dfB; dfE)
delete nR;
delete fSumX;
}
// This application tests the ANOVA code, independently of OOo
// Just run the application and compare the F-statistic
// from the outputted (last value) with that computed
// using gnumeric
// Compiled with MS VC 6.0 (1998)
#include <stdio.h>
#include "iostream"
using namespace std;
///////////////////
// We emulate some of the OOo routines and functions
typedef unsigned int SCSIZE;
void SetNoValue() {
wcout << L"No Value";
return;
}
void SetIllegalParameter() {
wcout << L"Illegal Parameter";
return;
}
void PushDouble(double i) {
wcout << L"\nF-Statistic: " << i;
return;
}
///////////////////////////////////
// More emulation
class ScMatrixRef {
private:
const SCSIZE ic;
const SCSIZE jc;
public:
ScMatrixRef(SCSIZE i, SCSIZE j):
ic(i),
jc(j) {return;}
void GetDimensions(SCSIZE &iR, SCSIZE &jR) {
iR = ic;
jR = jc;
return;
}
bool IsString(SCSIZE i, SCSIZE j) {
return false;
}
bool IsMatrix() const {return true;}
bool operator ! () const {return !IsMatrix();}
double GetDouble(double i, double j) {
i = (i > 1)?i:1.1;
return j * (i-1)/(i+1);
}
};
ScMatrixRef MatrixClass(5,10);
ScMatrixRef &GetMatrix()
{return MatrixClass;}
///////////////////////////////////
int main(int argc, char* argv[])
{
// THIS IS THE IMPORTANT CODE
// WE GOT A SINGLE MATRIX WHERE EVERY COLUMN IS A SEPARATE VARIABLE
// DISADVANTAGE: ONLY ONE COLUMN PER VARIABLE
// BUT IT IS EASYER TO USE AND IT IS NOT LIMITED TO 30 VARIABLES
ScMatrixRef pMat = GetMatrix();
if (!pMat) {
// NO DATA MATRIX - INVALID PARAMETERS
SetIllegalParameter();
return 1;
}
SCSIZE iVarNr /* nC */, nRMax;
// WE HAVE ONLY ONE MATRIX
// WE CONSIDER EVERY COLUMN AS A SEPARATE DATA SET
pMat.GetDimensions(iVarNr /* nC */, nRMax);
// iVarNr = nC; // nC IS NOT USED ANY FURTHER
if( iVarNr == 1 ) {
SetNoValue();
return 1; // ONLY ONE VARIABLE - ANOVA NOT POSSIBLE
}
SCSIZE *nR = new unsigned int[iVarNr];
// THIS WILL STORE THE NUMBER OF DATA VALUES FOR EACH VARIABLE
SCSIZE dfB = 0; // DEGREES OF FREEDOM
SCSIZE dfE = 0; // DEGREES OF FREEDOM
SCSIZE N = 0; // TOTAL NUMBER OF DATA VALUES
SCSIZE iCount = 0; // INDEX POINTING TO CURRENT VARIABLE
SCSIZE jCount = 0; // NUMBER OF VALUES FOR CURRENT VARIABLE
double fSumM = 0.0; // THIS IS THE GRAND MEAN
// WE NEED AN ARRAY TO STORE THE MEAN FOR EVERY GROUP (VARIABLE)
double *fSumX = new double[iVarNr]; // THE MEANS FOR THE INDIVIDUAL VARS
double fValX; // THE
INDIVIDUAL VALUES
for (iCount = 0; iCount < iVarNr; iCount++) {
fSumX[iCount] = 0.0; // INITIALIZE THE SUM
for (SCSIZE j = 0; j < nRMax; j++)
{
if (!pMat.IsString(iCount,j))
{
fSumX[iCount] +=
pMat.GetDouble(iCount,j);
jCount++;
}
}
fSumM += fSumX[iCount];
// GRAND TOTAL
fSumX[iCount] = fSumX[iCount] / jCount; // THIS IS THE
MEAN
nR[iCount] = jCount; // STORE HOW MANY DATA VALUES WE
HAVE
N += jCount; // THIS IS THE TOTAL NUMBER OF VALUES
jCount = 0; // RESET jCount FOR NEXT VARIABLE
wcout << L"Partial Sum: " << fSumX[iCount] << L"\n"; //
TEST
} // END OUTER FOR LOOP
dfB = iCount - 1; // CALCULATE THE DEGREES OF FREEDOM (df)
dfE = N - iCount;
fSumM = fSumM / N; // THIS IS THE GRAND MEAN
double fMSB = 0.0; // THIS IS INTER-GROUP VARIANCE
double fMSE = 0.0; // THIS IS INTRA-GROUP VARIANCE (DUE TO ERROR)
for (iCount = 0; iCount < iVarNr; iCount++) {
for (jCount = 0; jCount < nRMax; jCount++)
{
if (!pMat.IsString(iCount,jCount))
{
fValX = pMat.GetDouble(iCount,jCount);
fMSE += (fValX - fSumX[iCount]) * (fValX -
fSumX[iCount]);
}
}
fMSB += nR[iCount] * (fSumM - fSumX[iCount]) * (fSumM -
fSumX[iCount]);
wcout << L"\n";
} // END OUTER FOR LOOP
fMSB = fMSB / dfB;
fMSE = fMSE / dfE;
PushDouble( fMSB/fMSE );
// TODO:
// - WE STILL NEED TO INTERPRET fMSB/fMSE USING THE F STATISTICS
// - THIS IS DONE USING: =FDIST(fMSB/fMSE; dfB; dfE)
delete nR;
delete fSumX;
//
//if(AutoExit!=YES)
{wprintf(L"\n\nHit ENTER to terminate the program
");_flushall();getchar();}
//
return 0;
}
ANOVA-Test.gnumeric
Description: application/gnumeric
--------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]
