Author: psteitz
Date: Sat Jan 26 14:42:43 2008
New Revision: 615531
URL: http://svn.apache.org/viewvc?rev=615531&view=rev
Log:
Added one-way ANOVA implementation.
JIRA: MATH-173
Contributed by Bob MacCallum.
Added:
commons/proper/math/trunk/src/java/org/apache/commons/math/stat/inference/OneWayAnova.java
commons/proper/math/trunk/src/java/org/apache/commons/math/stat/inference/OneWayAnovaImpl.java
commons/proper/math/trunk/src/test/R/anovaTestCases
commons/proper/math/trunk/src/test/org/apache/commons/math/stat/inference/OneWayAnovaTest.java
Modified:
commons/proper/math/trunk/pom.xml
commons/proper/math/trunk/src/test/R/testAll
commons/proper/math/trunk/xdocs/changes.xml
Modified: commons/proper/math/trunk/pom.xml
URL:
http://svn.apache.org/viewvc/commons/proper/math/trunk/pom.xml?rev=615531&r1=615530&r2=615531&view=diff
==============================================================================
--- commons/proper/math/trunk/pom.xml (original)
+++ commons/proper/math/trunk/pom.xml Sat Jan 26 14:42:43 2008
@@ -119,6 +119,9 @@
<name>Piotr Kochanski</name>
</contributor>
<contributor>
+ <name>Bob MacCallum</name>
+ </contributor>
+ <contributor>
<name>Fredrik Norin</name>
</contributor>
<contributor>
Added:
commons/proper/math/trunk/src/java/org/apache/commons/math/stat/inference/OneWayAnova.java
URL:
http://svn.apache.org/viewvc/commons/proper/math/trunk/src/java/org/apache/commons/math/stat/inference/OneWayAnova.java?rev=615531&view=auto
==============================================================================
---
commons/proper/math/trunk/src/java/org/apache/commons/math/stat/inference/OneWayAnova.java
(added)
+++
commons/proper/math/trunk/src/java/org/apache/commons/math/stat/inference/OneWayAnova.java
Sat Jan 26 14:42:43 2008
@@ -0,0 +1,102 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.math.stat.inference;
+
+import org.apache.commons.math.MathException;
+import java.util.Collection;
+
+/**
+ * An interface for one-way ANOVA (analysis of variance).
+ *
+ * <p> Tests for differences between two or more categories of univariate data
+ * (for example, the body mass index of accountants, lawyers, doctors and
+ * computer programmers). When two categories are given, this is equivalent to
+ * the [EMAIL PROTECTED] org.apache.commons.math.stat.inference.TTest}.
+ * </p>
+ *
+ * @since 1.2
+ * @version $Revision$ $Date$
+ */
+public interface OneWayAnova {
+ /**
+ * Computes the ANOVA F-value for a collection of <code>double[]</code>
+ * arrays.
+ *
+ * <p><strong>Preconditions</strong>: <ul>
+ * <li>The categoryData <code>Collection</code> must contain
+ * <code>double[]</code> arrays.</li>
+ * <li> There must be at least two <code>double[]</code> arrays in the
+ * <code>categoryData</code> collection and each of these arrays must
+ * contain at least two values.</li></ul></p>
+ *
+ * @param categoryData <code>Collection</code> of <code>double[]</code>
+ * arrays each containing data for one category
+ * @return Fvalue
+ * @throws IllegalArgumentException if the preconditions are not met
+ * @throws MathException if the statistic can not be computed do to a
+ * convergence or other numerical error.
+ */
+ public double anovaFValue(Collection categoryData)
+ throws IllegalArgumentException, MathException;
+
+ /**
+ * Computes the ANOVA P-value for a collection of <code>double[]</code>
+ * arrays.
+ *
+ * <p><strong>Preconditions</strong>: <ul>
+ * <li>The categoryData <code>Collection</code> must contain
+ * <code>double[]</code> arrays.</li>
+ * <li> There must be at least two <code>double[]</code> arrays in the
+ * <code>categoryData</code> collection and each of these arrays must
+ * contain at least two values.</li></ul></p>
+ *
+ * @param categoryData <code>Collection</code> of <code>double[]</code>
+ * arrays each containing data for one category
+ * @return Pvalue
+ * @throws IllegalArgumentException if the preconditions are not met
+ * @throws MathException if the statistic can not be computed do to a
+ * convergence or other numerical error.
+ */
+ public double anovaPValue(Collection categoryData)
+ throws IllegalArgumentException, MathException;
+
+ /**
+ * Performs an ANOVA test, evaluating the null hypothesis that there
+ * is no difference among the means of the data categories.
+ *
+ * <p><strong>Preconditions</strong>: <ul>
+ * <li>The categoryData <code>Collection</code> must contain
+ * <code>double[]</code> arrays.</li>
+ * <li> There must be at least two <code>double[]</code> arrays in the
+ * <code>categoryData</code> collection and each of these arrays must
+ * contain at least two values.</li>
+ * <li>alpha must be strictly greater than 0 and less than or equal to 0.5.
+ * </li></ul></p>
+ *
+ * @param categoryData <code>Collection</code> of <code>double[]</code>
+ * arrays each containing data for one category
+ * @param alpha significance level of the test
+ * @return true if the null hypothesis can be rejected with
+ * confidence 1 - alpha
+ * @throws IllegalArgumentException if the preconditions are not met
+ * @throws MathException if the statistic can not be computed do to a
+ * convergence or other numerical error.
+ */
+ public boolean anovaTest(Collection categoryData, double alpha)
+ throws IllegalArgumentException, MathException;
+
+}
\ No newline at end of file
Added:
commons/proper/math/trunk/src/java/org/apache/commons/math/stat/inference/OneWayAnovaImpl.java
URL:
http://svn.apache.org/viewvc/commons/proper/math/trunk/src/java/org/apache/commons/math/stat/inference/OneWayAnovaImpl.java?rev=615531&view=auto
==============================================================================
---
commons/proper/math/trunk/src/java/org/apache/commons/math/stat/inference/OneWayAnovaImpl.java
(added)
+++
commons/proper/math/trunk/src/java/org/apache/commons/math/stat/inference/OneWayAnovaImpl.java
Sat Jan 26 14:42:43 2008
@@ -0,0 +1,208 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.math.stat.inference;
+
+import org.apache.commons.math.MathException;
+import org.apache.commons.math.stat.descriptive.summary.Sum;
+import org.apache.commons.math.stat.descriptive.summary.SumOfSquares;
+
+import org.apache.commons.math.distribution.FDistribution;
+import org.apache.commons.math.distribution.FDistributionImpl;
+
+import java.util.Collection;
+import java.util.Iterator;
+
+
+/**
+ * Implements one-way ANOVA statistics defined in the [EMAIL PROTECTED]
OneWayAnovaImpl}
+ * interface.
+ *
+ * <p>Uses the
+ * [EMAIL PROTECTED] org.apache.commons.math.distribution.FDistribution
+ * commons-math F Distribution implementation} to estimate exact p-values.</p>
+ *
+ * <p>This implementation is based on a description at
+ * http://faculty.vassar.edu/lowry/ch13pt1.html</p>
+ * <pre>
+ * Abbreviations: bg = between groups,
+ * wg = within groups,
+ * ss = sum squared deviations
+ * </pre>
+ *
+ * @since 1.2
+ * @version $Revision$ $Date$
+ */
+public class OneWayAnovaImpl implements OneWayAnova {
+
+ /**
+ * Default constructor.
+ */
+ public OneWayAnovaImpl() {
+ }
+
+ /**
+ * [EMAIL PROTECTED]<p>
+ * This implementation computes the F statistic using the definitional
+ * formula<pre>
+ * F = msbg/mswg</pre>
+ * where<pre>
+ * msbg = between group mean square
+ * mswg = within group mean square</pre>
+ * are as defined <a href="http://faculty.vassar.edu/lowry/ch13pt1.html">
+ * here</a></p>
+ */
+ public double anovaFValue(Collection categoryData)
+ throws IllegalArgumentException, MathException {
+ AnovaStats a = anovaStats(categoryData);
+ return a.F;
+ }
+
+ /**
+ * [EMAIL PROTECTED]<p>
+ * This implementation uses the
+ * [EMAIL PROTECTED] org.apache.commons.math.distribution.FDistribution
+ * commons-math F Distribution implementation} to estimate the exact
+ * p-value, using the formula<pre>
+ * p = 1 - cumulativeProbability(F)</pre>
+ * where <code>F</code> is the F value and
<code>cumulativeProbability</code>
+ * is the commons-math implementation of the F distribution.</p>
+ */
+ public double anovaPValue(Collection categoryData)
+ throws IllegalArgumentException, MathException {
+ AnovaStats a = anovaStats(categoryData);
+ FDistribution fdist = new FDistributionImpl(a.dfbg, a.dfwg);
+ return 1.0 - fdist.cumulativeProbability(a.F);
+ }
+
+ /**
+ * [EMAIL PROTECTED]<p>
+ * This implementation uses the
+ * [EMAIL PROTECTED] org.apache.commons.math.distribution.FDistribution
+ * commons-math F Distribution implementation} to estimate the exact
+ * p-value, using the formula<pre>
+ * p = 1 - cumulativeProbability(F)</pre>
+ * where <code>F</code> is the F value and
<code>cumulativeProbability</code>
+ * is the commons-math implementation of the F distribution.</p>
+ * <p>True is returned iff the estimated p-value is less than alpha.</p>
+ */
+ public boolean anovaTest(Collection categoryData, double alpha)
+ throws IllegalArgumentException, MathException {
+ if ((alpha <= 0) || (alpha > 0.5)) {
+ throw new IllegalArgumentException("bad significance level: " +
alpha);
+ }
+ return (anovaPValue(categoryData) < alpha);
+ }
+
+
+ /**
+ * This method actually does the calculations (except P-value).
+ *
+ * @param categoryData <code>Collection</code> of <code>double[]</code>
+ * arrays each containing data for one category
+ * @return computed AnovaStats
+ * @throws IllegalArgumentException if categoryData does not meet
+ * preconditions specified in the interface definition
+ * @throws MathException if an error occurs computing the Anova stats
+ */
+ private AnovaStats anovaStats(Collection categoryData)
+ throws IllegalArgumentException, MathException {
+
+ // check if we have enough categories
+ if (categoryData.size() < 2) {
+ throw new IllegalArgumentException(
+ "ANOVA: two or more categories required");
+ }
+
+ // check if each category has enough data and all is double[]
+ for (Iterator iterator = categoryData.iterator(); iterator.hasNext();)
{
+ double[] array;
+ try {
+ array = (double[])iterator.next();
+ } catch (Exception ex) {
+ throw new IllegalArgumentException(
+ "ANOVA: categoryData contains non-double[] elements.",
ex);
+ }
+ if (array.length <= 1) {
+ throw new IllegalArgumentException(
+ "ANOVA: one element of categoryData has fewer than 2
values.");
+ }
+ }
+
+ int dfwg = 0;
+ double sswg = 0;
+ Sum totsum = new Sum();
+ SumOfSquares totsumsq = new SumOfSquares();
+ int totnum = 0;
+
+ for (Iterator iterator = categoryData.iterator(); iterator.hasNext();)
{
+ double[] data = (double[])iterator.next();
+
+ Sum sum = new Sum();
+ SumOfSquares sumsq = new SumOfSquares();
+ int num = 0;
+
+ for (int i = 0; i < data.length; i++) {
+ double val = data[i];
+
+ // within category
+ num++;
+ sum.increment(val);
+ sumsq.increment(val);
+
+ // for all categories
+ totnum++;
+ totsum.increment(val);
+ totsumsq.increment(val);
+ }
+ dfwg += num - 1;
+ double ss = sumsq.getResult() - sum.getResult() * sum.getResult()
/ num;
+ sswg += ss;
+ }
+ double sst = totsumsq.getResult() - totsum.getResult() *
+ totsum.getResult()/totnum;
+ double ssbg = sst - sswg;
+ int dfbg = categoryData.size() - 1;
+ double msbg = ssbg/dfbg;
+ double mswg = sswg/dfwg;
+ double F = msbg/mswg;
+
+ return new AnovaStats(dfbg, dfwg, F);
+ }
+
+ /**
+ Convenience class to pass dfbg,dfwg,F values around within AnovaImpl.
+ No get/set methods provided.
+ */
+ private static class AnovaStats {
+ private int dfbg;
+ private int dfwg;
+ private double F;
+
+ /**
+ * Constructor
+ * @param dfbg degrees of freedom in numerator (between groups)
+ * @param dfwg degrees of freedom in denominator (within groups)
+ * @param F statistic
+ */
+ AnovaStats(int dfbg, int dfwg, double F) {
+ this.dfbg = dfbg;
+ this.dfwg = dfwg;
+ this.F = F;
+ }
+ }
+
+}
\ No newline at end of file
Added: commons/proper/math/trunk/src/test/R/anovaTestCases
URL:
http://svn.apache.org/viewvc/commons/proper/math/trunk/src/test/R/anovaTestCases?rev=615531&view=auto
==============================================================================
--- commons/proper/math/trunk/src/test/R/anovaTestCases (added)
+++ commons/proper/math/trunk/src/test/R/anovaTestCases Sat Jan 26 14:42:43 2008
@@ -0,0 +1,72 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+#------------------------------------------------------------------------------
+# R source file to validate Binomial distribution tests in
+# org.apache.commons.math.distribution.BinomialDistributionTest
+#
+# To run the test, install R, put this file and testFunctions
+# into the same directory, launch R from this directory and then enter
+# source("<name-of-this-file>")
+#
+# R functions used
+# anova(model) <- anova
+# lm(frame) <- linear model
+#------------------------------------------------------------------------------
+tol <- 1E-12 # error tolerance for tests
+#------------------------------------------------------------------------------
+# Function definitions
+
+source("testFunctions") # utility test functions
+options(digits=16) # override number of digits
displayed
+
+# function to verify anova computations
+
+verifyAnova <- function(frame, expectedP, expectedF, frameName) {
+ a <- anova(lm(frame))
+ p <- a$"Pr(>F)"[1]
+ f <- a$"F value"[1]
+ output <- c("P-value test frame = ", frameName)
+ if (assertEquals(expectedP,p,tol,"P value")) {
+ displayPadded(output, SUCCEEDED, WIDTH)
+ } else {
+ displayPadded(output, FAILED, WIDTH)
+ }
+ output <- c("F-value test frame = ", frameName)
+ if (assertEquals(expectedF,f,tol,"F value")) {
+ displayPadded(output, SUCCEEDED, WIDTH)
+ } else {
+ displayPadded(output, FAILED, WIDTH)
+ }
+}
+
+#--------------------------------------------------------------------------
+cat("Anova test cases\n")
+classA <- c(93.0, 103.0, 95.0, 101.0, 91.0, 105.0, 96.0, 94.0, 101.0)
+classB <- c(99.0, 92.0, 102.0, 100.0, 102.0, 89.0)
+classC <- c(110.0, 115.0, 111.0, 117.0, 128.0, 117.0)
+
+threeClasses = data.frame(val = c(classA, classB, classC),
+class=c(rep("classA", length(classA)),
+ rep("classB", length(classB)),
+ rep("classC", length(classC))))
+
+verifyAnova(threeClasses,6.959446e-06, 24.67361709460624, "Three classes")
+
+twoClasses = data.frame(val = c(classA, classB),
+class=c(rep("classA", length(classA)), rep("classB", length(classB))))
+verifyAnova(twoClasses, 0.904212960464, 0.0150579150579, "Two classes")
+
+displayDashes(WIDTH)
\ No newline at end of file
Modified: commons/proper/math/trunk/src/test/R/testAll
URL:
http://svn.apache.org/viewvc/commons/proper/math/trunk/src/test/R/testAll?rev=615531&r1=615530&r2=615531&view=diff
==============================================================================
--- commons/proper/math/trunk/src/test/R/testAll (original)
+++ commons/proper/math/trunk/src/test/R/testAll Sat Jan 26 14:42:43 2008
@@ -39,6 +39,7 @@
# inference
source("chiSquareTestCases")
+source("anovaTestCases")
# descriptive
source("descriptiveTestCases")
Added:
commons/proper/math/trunk/src/test/org/apache/commons/math/stat/inference/OneWayAnovaTest.java
URL:
http://svn.apache.org/viewvc/commons/proper/math/trunk/src/test/org/apache/commons/math/stat/inference/OneWayAnovaTest.java?rev=615531&view=auto
==============================================================================
---
commons/proper/math/trunk/src/test/org/apache/commons/math/stat/inference/OneWayAnovaTest.java
(added)
+++
commons/proper/math/trunk/src/test/org/apache/commons/math/stat/inference/OneWayAnovaTest.java
Sat Jan 26 14:42:43 2008
@@ -0,0 +1,143 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.math.stat.inference;
+
+import junit.framework.Test;
+import junit.framework.TestCase;
+import junit.framework.TestSuite;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Test cases for the OneWayAnovaImpl class.
+ *
+ * @version $Revision$ $Date$
+ */
+
+public class OneWayAnovaTest extends TestCase {
+
+ protected OneWayAnova testStatistic = new OneWayAnovaImpl();
+
+ private char[] wrongArray = { 'a', 'b', 'c' };
+ private double[] emptyArray = {};
+
+ private double[] classA =
+ {93.0, 103.0, 95.0, 101.0, 91.0, 105.0, 96.0, 94.0, 101.0 };
+ private double[] classB =
+ {99.0, 92.0, 102.0, 100.0, 102.0, 89.0 };
+ private double[] classC =
+ {110.0, 115.0, 111.0, 117.0, 128.0, 117.0 };
+
+ public OneWayAnovaTest(String name) {
+ super(name);
+ }
+
+ public void setUp() {
+ }
+
+ public static Test suite() {
+ TestSuite suite = new TestSuite(OneWayAnovaTest.class);
+ suite.setName("TestStatistic Tests");
+ return suite;
+ }
+
+ public void testAnovaFValue() throws Exception {
+ // Target comparison values computed using R version 2.6.0 (Linux
version)
+ List threeClasses = new ArrayList();
+ threeClasses.add(classA);
+ threeClasses.add(classB);
+ threeClasses.add(classC);
+
+ assertEquals("ANOVA F-value", 24.67361709460624,
+ testStatistic.anovaFValue(threeClasses), 1E-12);
+
+ List twoClasses = new ArrayList();
+ twoClasses.add(classA);
+ twoClasses.add(classB);
+
+ assertEquals("ANOVA F-value", 0.0150579150579,
+ testStatistic.anovaFValue(twoClasses), 1E-12);
+
+ // now try some input hashes which should fail
+ List wrongContents = new ArrayList();
+ wrongContents.add(classC);
+ wrongContents.add(wrongArray);
+ try {
+ double bad = testStatistic.anovaFValue(wrongContents);
+ fail("non double[] hash value for key classX,
IllegalArgumentException expected");
+ } catch (IllegalArgumentException ex) {
+ // expected
+ }
+
+ List emptyContents = new ArrayList();
+ emptyContents.add(emptyArray);
+ emptyContents.add(classC);
+ try {
+ double bad = testStatistic.anovaFValue(emptyContents);
+ fail("empty array for key classX, IllegalArgumentException
expected");
+ } catch (IllegalArgumentException ex) {
+ // expected
+ }
+
+ List tooFew = new ArrayList();
+ tooFew.add(classA);
+ try {
+ double bad = testStatistic.anovaFValue(tooFew);
+ fail("less than two classes, IllegalArgumentException expected");
+ } catch (IllegalArgumentException ex) {
+ // expected
+ }
+ }
+
+
+ public void testAnovaPValue() throws Exception {
+ // Target comparison values computed using R version 2.6.0 (Linux
version)
+ List threeClasses = new ArrayList();
+ threeClasses.add(classA);
+ threeClasses.add(classB);
+ threeClasses.add(classC);
+
+ assertEquals("ANOVA P-value", 6.959446E-06,
+ testStatistic.anovaPValue(threeClasses), 1E-12);
+
+ List twoClasses = new ArrayList();
+ twoClasses.add(classA);
+ twoClasses.add(classB);
+
+ assertEquals("ANOVA P-value", 0.904212960464,
+ testStatistic.anovaPValue(twoClasses), 1E-12);
+
+ }
+
+ public void testAnovaTest() throws Exception {
+ // Target comparison values computed using R version 2.3.1 (Linux
version)
+ List threeClasses = new ArrayList();
+ threeClasses.add(classA);
+ threeClasses.add(classB);
+ threeClasses.add(classC);
+
+ assertTrue("ANOVA Test P<0.01", testStatistic.anovaTest(threeClasses,
0.01));
+
+ List twoClasses = new ArrayList();
+ twoClasses.add(classA);
+ twoClasses.add(classB);
+
+ assertFalse("ANOVA Test P>0.01", testStatistic.anovaTest(twoClasses,
0.01));
+ }
+
+}
\ No newline at end of file
Modified: commons/proper/math/trunk/xdocs/changes.xml
URL:
http://svn.apache.org/viewvc/commons/proper/math/trunk/xdocs/changes.xml?rev=615531&r1=615530&r2=615531&view=diff
==============================================================================
--- commons/proper/math/trunk/xdocs/changes.xml (original)
+++ commons/proper/math/trunk/xdocs/changes.xml Sat Jan 26 14:42:43 2008
@@ -129,7 +129,10 @@
</action>
<action dev="luc" type="update" issue="MATH-179" due-to="Niall
Pemberton">
Add tests for Fraction constructor using double parameter.
- </action>
+ </action>
+ <action dev="psteitz" type="update" issue="MATH-173" due-to="Bob
MacCallum">
+ Added one-way ANOVA implementation.
+ </action>
</release>
<release version="1.1" date="2005-12-17"
description="This is a maintenance release containing bug fixes and
enhancements.