psteitz 2004/05/23 22:29:05
Modified: math/src/java/org/apache/commons/math/stat/inference
TTest.java TTestImpl.java
Log:
Added support for paired t-tests.
PR #29049
Reported by: Joel Freyss
Revision Changes Path
1.4 +245 -151
jakarta-commons/math/src/java/org/apache/commons/math/stat/inference/TTest.java
Index: TTest.java
===================================================================
RCS file:
/home/cvs/jakarta-commons/math/src/java/org/apache/commons/math/stat/inference/TTest.java,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -r1.3 -r1.4
--- TTest.java 23 May 2004 05:45:11 -0000 1.3
+++ TTest.java 24 May 2004 05:29:05 -0000 1.4
@@ -24,6 +24,100 @@
* @version $Revision$ $Date$
*/
public interface TTest {
+
+
+ /**
+ * Computes a paired, 2-sample t-statistic based on the data in the input
+ * arrays. The t-statistic returned is equivalent to what would be returned by
+ * computing the one-sample t-statistic [EMAIL PROTECTED] #t(double,
double[])}, with
+ * <code>mu = 0</code> and the sample array consisting of the (signed)
+ * differences between corresponding entries in <code>sample1</code> and
+ * <code>sample2.</code>
+ * <p>
+ * <strong>Preconditions</strong>: <ul>
+ * <li>The input arrays must have the same length and their common length
+ * must be at least 2.
+ * </li></ul>
+ *
+ * @param sample1 array of sample data values
+ * @param sample2 array of sample data values
+ * @return t statistic
+ * @throws IllegalArgumentException if the precondition is not met
+ * @throws MathException if the statistic can not be computed do to a
+ * convergence or other numerical error.
+ */
+ double pairedT(double[] sample1, double[] sample2)
+ throws IllegalArgumentException, MathException;
+
+ /**
+ * Returns the <i>observed significance level</i>, or
+ * <a href="http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue">
+ * p-value</a>, associated with a paired, two-sample, two-tailed t-test
+ * based on the data in the input arrays.
+ * <p>
+ * The number returned is the smallest significance level
+ * at which one can reject the null hypothesis that the mean of the paired
+ * differences is 0 in favor of the two-sided alternative that the mean paired
+ * difference is not equal to 0. For a one-sided test, divide the returned
+ * value by 2.
+ * <p>
+ * This test is equivalent to a one-sample t-test computed using
+ * [EMAIL PROTECTED] #tTest(double, double[])} with <code>mu = 0</code> and
the sample array
+ * consisting of the signed differences between corresponding elements of
+ * <code>sample1</code> and <code>sample2.</code>
+ * <p>
+ * <strong>Usage Note:</strong><br>
+ * The validity of the p-value depends on the assumptions of the parametric
+ * t-test procedure, as discussed
+ * <a
href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a>
+ * <p>
+ * <strong>Preconditions</strong>: <ul>
+ * <li>The input array lengths must be the same and their common length must
+ * be at least 2.
+ * </li></ul>
+ *
+ * @param sample1 array of sample data values
+ * @param sample2 array of sample data values
+ * @return p-value for t-test
+ * @throws IllegalArgumentException if the precondition is not met
+ * @throws MathException if an error occurs computing the p-value
+ */
+ double pairedTTest(double[] sample1, double[] sample2)
+ throws IllegalArgumentException, MathException;
+
+ /**
+ * Performs a paired t-test</a> evaluating that null hypothesis that the
+ * mean of the paired differences between <code>sample1</code> and
+ * <code>sample2</code> is 0 in favor of the two-sided alternative that the
+ * mean paired difference is not equal to 0, with significance level
+ * <code>alpha</code>.
+ * <p>
+ * Returns <code>true</code> iff the null hypothesis can be rejected with
+ * confidence <code>1 - alpha</code>. To perform a 1-sided test, use
+ * <code>alpha / 2</code>
+ * <p>
+ * <strong>Usage Note:</strong><br>
+ * The validity of the test depends on the assumptions of the parametric
+ * t-test procedure, as discussed
+ * <a
href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a>
+ * <p>
+ * <strong>Preconditions</strong>: <ul>
+ * <li>The input array lengths must be the same and their common length must be
at least 2.
+ * </li>
+ * <li> <code> 0 < alpha < 0.5 </code>
+ * </li></ul>
+ *
+ * @param sample1 array of sample data values
+ * @param sample2 array of sample data values
+ * @param alpha significance level of the test
+ * @return true if the null hypothesis can be rejected with
+ * confidence 1 - alpha
+ * @throws IllegalArgumentException if the preconditions are not met
+ * @throws MathException if an error occurs performing the test
+ */
+ boolean pairedTTest(double[] sample1, double[] sample2, double alpha)
+ throws IllegalArgumentException, MathException;
+
/**
* Computes a <a
href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc22.htm#formula">
* t statistic </a> given observed values and a comparison constant.
@@ -43,6 +137,25 @@
throws IllegalArgumentException;
/**
+ * Computes a <a
href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc22.htm#formula">
+ * t statistic </a> to use in comparing the dataset described by
<code>sampleStats</code>
+ * to <code>mu</code>.
+ * <p>
+ * This statistic can be used to perform a one sample t-test for the mean.
+ * <p>
+ * <strong>Preconditions</strong>: <ul>
+ * <li><code>observed.getN() > = 2</code>.
+ * </li></ul>
+ *
+ * @param mu comparison constant
+ * @param sampleStats DescriptiveStatistics holding sample summary statitstics
+ * @return t statistic
+ * @throws IllegalArgumentException if the precondition is not met
+ */
+ double t(double mu, StatisticalSummary sampleStats)
+ throws IllegalArgumentException;
+
+ /**
* Computes a <a
href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">
* 2-sample t statistic </a>, without the assumption of equal sample variances.
* <p>
@@ -64,63 +177,37 @@
throws IllegalArgumentException, MathException;
/**
- * Returns the <i>observed significance level</i>, or
- * <a href="http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue">
- * p-value</a>, associated with a two-sample, two-tailed t-test
- * comparing the means of the input arrays.
- * <p>
- * The number returned is the smallest significance level
- * at which one can reject the null hypothesis that the two means are
- * equal in favor of the two-sided alternative that they are different.
- * For a one-sided test, divide the returned value by 2.
- * <p>
- * The test does not assume that the underlying popuation variances are
- * equal and it uses approximated degrees of freedom computed from the
- * sample data as described
- * <a
href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">here</a>
+ * Computes a <a
href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">
+ * 2-sample t statistic </a>, comparing the means of the datasets described
+ * by two [EMAIL PROTECTED] StatisticalSummary} instances without the
assumption of equal sample variances.
* <p>
- * <strong>Usage Note:</strong><br>
- * The validity of the p-value depends on the assumptions of the parametric
- * t-test procedure, as discussed
- * <a
href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a>
+ * This statistic can be used to perform a two-sample t-test to compare
+ * sample means.
* <p>
* <strong>Preconditions</strong>: <ul>
- * <li>The observed array lengths must both be at least 2.
+ * <li>The datasets described by the two Univariates must each contain
+ * at least 2 observations.
* </li></ul>
*
- * @param sample1 array of sample data values
- * @param sample2 array of sample data values
- * @return p-value for t-test
+ * @param sampleStats1 StatisticalSummary describing data from the first sample
+ * @param sampleStats2 StatisticalSummary describing data from the second sample
+ * @return t statistic
* @throws IllegalArgumentException if the precondition is not met
- * @throws MathException if an error occurs computing the p-value
*/
- double tTest(double[] sample1, double[] sample2)
- throws IllegalArgumentException, MathException;
+ double t(StatisticalSummary sampleStats1, StatisticalSummary sampleStats2)
+ throws IllegalArgumentException;
/**
- * Performs a <a
href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
- * two-sided t-test</a> evaluating the null hypothesis that
<code>sample1</code>
- * and <code>sample2</code> are drawn from populations with the same mean,
- * with significance level <code>alpha</code>.
- * <p>
- * Returns <code>true</code> iff the null hypothesis that the means are
- * equal can be rejected with confidence <code>1 - alpha</code>. To
- * perform a 1-sided test, use <code>alpha / 2</code>
- * <p>
- * <strong>Examples:</strong><br><ol>
- * <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at
- * the 95% level, use <br><code>tTest(sample1, sample2, 0.05) </code>
- * </li>
- * <li>To test the (one-sided) hypothesis <code> mean 1 < mean 2 </code>
- * at the 99% level, first verify that the measured mean of
- * <code>sample 1</code> is less than the mean of <code>sample 2</code>
- * and then use <br><code>tTest(sample1, sample2, 0.005) </code>
- * </li></ol>
+ * Returns the <i>observed significance level</i>, or
+ * <a href="http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue">
+ * p-value</a>, associated with a one-sample, two-tailed t-test
+ * comparing the mean of the input array with the constant <code>mu</code>.
* <p>
- * The test does not assume that the underlying popuation variances are
- * equal and it uses approximated degrees of freedom computed from the
- * sample data as described
- * <a
href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">here</a>
+ * The number returned is the smallest significance level
+ * at which one can reject the null hypothesis that the mean equals
+ * <code>mu</code> in favor of the two-sided alternative that the mean
+ * is different from <code>mu</code>. For a one-sided test, divide the
+ * returned value by 2.
* <p>
* <strong>Usage Note:</strong><br>
* The validity of the test depends on the assumptions of the parametric
@@ -128,20 +215,16 @@
* <a
href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a>
* <p>
* <strong>Preconditions</strong>: <ul>
- * <li>The observed array lengths must both be at least 2.
- * </li>
- * <li> <code> 0 < alpha < 0.5 </code>
+ * <li>The observed array length must be at least 2.
* </li></ul>
*
- * @param sample1 array of sample data values
- * @param sample2 array of sample data values
- * @param alpha significance level of the test
- * @return true if the null hypothesis can be rejected with
- * confidence 1 - alpha
- * @throws IllegalArgumentException if the preconditions are not met
- * @throws MathException if an error occurs performing the test
+ * @param mu constant value to compare sample mean against
+ * @param sample array of sample data values
+ * @return p-value
+ * @throws IllegalArgumentException if the precondition is not met
+ * @throws MathException if an error occurs computing the p-value
*/
- boolean tTest(double[] sample1, double[] sample2, double alpha)
+ double tTest(double mu, double[] sample)
throws IllegalArgumentException, MathException;
/**
@@ -186,7 +269,8 @@
* Returns the <i>observed significance level</i>, or
* <a href="http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue">
* p-value</a>, associated with a one-sample, two-tailed t-test
- * comparing the mean of the input array with the constant <code>mu</code>.
+ * comparing the mean of the dataset described by <code>sampleStats</code>
+ * with the constant <code>mu</code>.
* <p>
* The number returned is the smallest significance level
* at which one can reject the null hypothesis that the mean equals
@@ -200,63 +284,61 @@
* <a
href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a>
* <p>
* <strong>Preconditions</strong>: <ul>
- * <li>The observed array length must be at least 2.
+ * <li>The sample must contain at least 2 observations.
* </li></ul>
*
* @param mu constant value to compare sample mean against
- * @param sample array of sample data values
+ * @param sampleStats StatisticalSummary describing sample data
* @return p-value
* @throws IllegalArgumentException if the precondition is not met
* @throws MathException if an error occurs computing the p-value
*/
- double tTest(double mu, double[] sample)
+ double tTest(double mu, StatisticalSummary sampleStats)
throws IllegalArgumentException, MathException;
/**
- * Computes a <a
href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc22.htm#formula">
- * t statistic </a> to use in comparing the dataset described by
<code>sampleStats</code>
- * to <code>mu</code>.
+ * Performs a <a
href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
+ * two-sided t-test</a> evaluating the null hypothesis that the mean of the
population from
+ * which the dataset described by <code>stats</code> is drawn equals
<code>mu</code>.
* <p>
- * This statistic can be used to perform a one sample t-test for the mean.
+ * Returns <code>true</code> iff the null hypothesis can be
+ * rejected with confidence <code>1 - alpha</code>. To
+ * perform a 1-sided test, use <code>alpha / 2</code>
* <p>
- * <strong>Preconditions</strong>: <ul>
- * <li><code>observed.getN() > = 2</code>.
- * </li></ul>
- *
- * @param mu comparison constant
- * @param sampleStats DescriptiveStatistics holding sample summary statitstics
- * @return t statistic
- * @throws IllegalArgumentException if the precondition is not met
- */
- double t(double mu, StatisticalSummary sampleStats)
- throws IllegalArgumentException;
-
- /**
- * Computes a <a
href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">
- * 2-sample t statistic </a>, comparing the means of the datasets described
- * by two [EMAIL PROTECTED] StatisticalSummary} instances without the
assumption of equal sample variances.
+ * <strong>Examples:</strong><br><ol>
+ * <li>To test the (2-sided) hypothesis <code>sample mean = mu </code> at
+ * the 95% level, use <br><code>tTest(mu, sampleStats, 0.05) </code>
+ * </li>
+ * <li>To test the (one-sided) hypothesis <code> sample mean < mu </code>
+ * at the 99% level, first verify that the measured sample mean is less
+ * than <code>mu</code> and then use
+ * <br><code>tTest(mu, sampleStats, 0.005) </code>
+ * </li></ol>
* <p>
- * This statistic can be used to perform a two-sample t-test to compare
- * sample means.
+ * <strong>Usage Note:</strong><br>
+ * The validity of the test depends on the assumptions of the one-sample
+ * parametric t-test procedure, as discussed
+ * <a
href="http://www.basic.nwu.edu/statguidefiles/sg_glos.html#one-sample">here</a>
* <p>
* <strong>Preconditions</strong>: <ul>
- * <li>The datasets described by the two Univariates must each contain
- * at least 2 observations.
+ * <li>The sample must include at least 2 observations.
* </li></ul>
*
- * @param sampleStats1 StatisticalSummary describing data from the first sample
- * @param sampleStats2 StatisticalSummary describing data from the second sample
- * @return t statistic
+ * @param mu constant value to compare sample mean against
+ * @param sampleStats StatisticalSummary describing sample data values
+ * @param alpha significance level of the test
+ * @return p-value
* @throws IllegalArgumentException if the precondition is not met
+ * @throws MathException if an error occurs computing the p-value
*/
- double t(StatisticalSummary sampleStats1, StatisticalSummary sampleStats2)
- throws IllegalArgumentException;
+ boolean tTest(double mu, StatisticalSummary sampleStats, double alpha)
+ throws IllegalArgumentException, MathException;
/**
* Returns the <i>observed significance level</i>, or
* <a href="http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue">
* p-value</a>, associated with a two-sample, two-tailed t-test
- * comparing the means of the datasets described by two Univariates.
+ * comparing the means of the input arrays.
* <p>
* The number returned is the smallest significance level
* at which one can reject the null hypothesis that the two means are
@@ -274,24 +356,23 @@
* <a
href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a>
* <p>
* <strong>Preconditions</strong>: <ul>
- * <li>The datasets described by the two Univariates must each contain
- * at least 2 observations.
+ * <li>The observed array lengths must both be at least 2.
* </li></ul>
*
- * @param sampleStats1 StatisticalSummary describing data from the first sample
- * @param sampleStats2 StatisticalSummary describing data from the second sample
+ * @param sample1 array of sample data values
+ * @param sample2 array of sample data values
* @return p-value for t-test
* @throws IllegalArgumentException if the precondition is not met
* @throws MathException if an error occurs computing the p-value
*/
- double tTest(StatisticalSummary sampleStats1, StatisticalSummary sampleStats2)
+ double tTest(double[] sample1, double[] sample2)
throws IllegalArgumentException, MathException;
/**
* Performs a <a
href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
- * two-sided t-test</a> evaluating the null hypothesis that
<code>sampleStats1</code>
- * and <code>sampleStats2</code> describe datasets drawn from populations with
the
- * same mean, with significance level <code>alpha</code>.
+ * two-sided t-test</a> evaluating the null hypothesis that
<code>sample1</code>
+ * and <code>sample2</code> are drawn from populations with the same mean,
+ * with significance level <code>alpha</code>.
* <p>
* Returns <code>true</code> iff the null hypothesis that the means are
* equal can be rejected with confidence <code>1 - alpha</code>. To
@@ -299,13 +380,12 @@
* <p>
* <strong>Examples:</strong><br><ol>
* <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at
- * the 95% level, use
- * <br><code>tTest(sampleStats1, sampleStats2, 0.05) </code>
+ * the 95% level, use <br><code>tTest(sample1, sample2, 0.05) </code>
* </li>
* <li>To test the (one-sided) hypothesis <code> mean 1 < mean 2 </code>
* at the 99% level, first verify that the measured mean of
* <code>sample 1</code> is less than the mean of <code>sample 2</code>
- * and then use <br><code>tTest(sampleStats1, sampleStats2, 0.005) </code>
+ * and then use <br><code>tTest(sample1, sample2, 0.005) </code>
* </li></ol>
* <p>
* The test does not assume that the underlying popuation variances are
@@ -319,74 +399,82 @@
* <a
href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a>
* <p>
* <strong>Preconditions</strong>: <ul>
- * <li>The datasets described by the two Univariates must each contain
- * at least 2 observations.
+ * <li>The observed array lengths must both be at least 2.
* </li>
* <li> <code> 0 < alpha < 0.5 </code>
* </li></ul>
*
- * @param sampleStats1 StatisticalSummary describing sample data values
- * @param sampleStats2 StatisticalSummary describing sample data values
+ * @param sample1 array of sample data values
+ * @param sample2 array of sample data values
* @param alpha significance level of the test
* @return true if the null hypothesis can be rejected with
* confidence 1 - alpha
* @throws IllegalArgumentException if the preconditions are not met
* @throws MathException if an error occurs performing the test
*/
- boolean tTest(StatisticalSummary sampleStats1, StatisticalSummary sampleStats2,
- double alpha)
+ boolean tTest(double[] sample1, double[] sample2, double alpha)
throws IllegalArgumentException, MathException;
/**
- * Performs a <a
href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
- * two-sided t-test</a> evaluating the null hypothesis that the mean of the
population from
- * which the dataset described by <code>stats</code> is drawn equals
<code>mu</code>.
+ * Returns the <i>observed significance level</i>, or
+ * <a href="http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue">
+ * p-value</a>, associated with a two-sample, two-tailed t-test
+ * comparing the means of the datasets described by two Univariates.
* <p>
- * Returns <code>true</code> iff the null hypothesis can be
- * rejected with confidence <code>1 - alpha</code>. To
- * perform a 1-sided test, use <code>alpha / 2</code>
+ * The number returned is the smallest significance level
+ * at which one can reject the null hypothesis that the two means are
+ * equal in favor of the two-sided alternative that they are different.
+ * For a one-sided test, divide the returned value by 2.
* <p>
- * <strong>Examples:</strong><br><ol>
- * <li>To test the (2-sided) hypothesis <code>sample mean = mu </code> at
- * the 95% level, use <br><code>tTest(mu, sampleStats, 0.05) </code>
- * </li>
- * <li>To test the (one-sided) hypothesis <code> sample mean < mu </code>
- * at the 99% level, first verify that the measured sample mean is less
- * than <code>mu</code> and then use
- * <br><code>tTest(mu, sampleStats, 0.005) </code>
- * </li></ol>
+ * The test does not assume that the underlying popuation variances are
+ * equal and it uses approximated degrees of freedom computed from the
+ * sample data as described
+ * <a
href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">here</a>
* <p>
* <strong>Usage Note:</strong><br>
- * The validity of the test depends on the assumptions of the one-sample
- * parametric t-test procedure, as discussed
- * <a
href="http://www.basic.nwu.edu/statguidefiles/sg_glos.html#one-sample">here</a>
+ * The validity of the p-value depends on the assumptions of the parametric
+ * t-test procedure, as discussed
+ * <a
href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a>
* <p>
* <strong>Preconditions</strong>: <ul>
- * <li>The sample must include at least 2 observations.
+ * <li>The datasets described by the two Univariates must each contain
+ * at least 2 observations.
* </li></ul>
*
- * @param mu constant value to compare sample mean against
- * @param sampleStats StatisticalSummary describing sample data values
- * @param alpha significance level of the test
- * @return p-value
+ * @param sampleStats1 StatisticalSummary describing data from the first sample
+ * @param sampleStats2 StatisticalSummary describing data from the second sample
+ * @return p-value for t-test
* @throws IllegalArgumentException if the precondition is not met
* @throws MathException if an error occurs computing the p-value
*/
- boolean tTest(double mu, StatisticalSummary sampleStats, double alpha)
+ double tTest(StatisticalSummary sampleStats1, StatisticalSummary sampleStats2)
throws IllegalArgumentException, MathException;
/**
- * Returns the <i>observed significance level</i>, or
- * <a href="http://www.cas.lancs.ac.uk/glossary_v1.1/hyptest.html#pvalue">
- * p-value</a>, associated with a one-sample, two-tailed t-test
- * comparing the mean of the dataset described by <code>sampleStats</code>
- * with the constant <code>mu</code>.
+ * Performs a <a
href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
+ * two-sided t-test</a> evaluating the null hypothesis that
<code>sampleStats1</code>
+ * and <code>sampleStats2</code> describe datasets drawn from populations with
the
+ * same mean, with significance level <code>alpha</code>.
* <p>
- * The number returned is the smallest significance level
- * at which one can reject the null hypothesis that the mean equals
- * <code>mu</code> in favor of the two-sided alternative that the mean
- * is different from <code>mu</code>. For a one-sided test, divide the
- * returned value by 2.
+ * Returns <code>true</code> iff the null hypothesis that the means are
+ * equal can be rejected with confidence <code>1 - alpha</code>. To
+ * perform a 1-sided test, use <code>alpha / 2</code>
+ * <p>
+ * <strong>Examples:</strong><br><ol>
+ * <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at
+ * the 95% level, use
+ * <br><code>tTest(sampleStats1, sampleStats2, 0.05) </code>
+ * </li>
+ * <li>To test the (one-sided) hypothesis <code> mean 1 < mean 2 </code>
+ * at the 99% level, first verify that the measured mean of
+ * <code>sample 1</code> is less than the mean of <code>sample 2</code>
+ * and then use <br><code>tTest(sampleStats1, sampleStats2, 0.005) </code>
+ * </li></ol>
+ * <p>
+ * The test does not assume that the underlying popuation variances are
+ * equal and it uses approximated degrees of freedom computed from the
+ * sample data as described
+ * <a
href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">here</a>
* <p>
* <strong>Usage Note:</strong><br>
* The validity of the test depends on the assumptions of the parametric
@@ -394,15 +482,21 @@
* <a
href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a>
* <p>
* <strong>Preconditions</strong>: <ul>
- * <li>The sample must contain at least 2 observations.
+ * <li>The datasets described by the two Univariates must each contain
+ * at least 2 observations.
+ * </li>
+ * <li> <code> 0 < alpha < 0.5 </code>
* </li></ul>
*
- * @param mu constant value to compare sample mean against
- * @param sampleStats StatisticalSummary describing sample data
- * @return p-value
- * @throws IllegalArgumentException if the precondition is not met
- * @throws MathException if an error occurs computing the p-value
+ * @param sampleStats1 StatisticalSummary describing sample data values
+ * @param sampleStats2 StatisticalSummary describing sample data values
+ * @param alpha significance level of the test
+ * @return true if the null hypothesis can be rejected with
+ * confidence 1 - alpha
+ * @throws IllegalArgumentException if the preconditions are not met
+ * @throws MathException if an error occurs performing the test
*/
- double tTest(double mu, StatisticalSummary sampleStats)
+ boolean tTest(StatisticalSummary sampleStats1, StatisticalSummary sampleStats2,
+ double alpha)
throws IllegalArgumentException, MathException;
}
1.3 +200 -161
jakarta-commons/math/src/java/org/apache/commons/math/stat/inference/TTestImpl.java
Index: TTestImpl.java
===================================================================
RCS file:
/home/cvs/jakarta-commons/math/src/java/org/apache/commons/math/stat/inference/TTestImpl.java,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -r1.2 -r1.3
--- TTestImpl.java 23 May 2004 05:04:48 -0000 1.2
+++ TTestImpl.java 24 May 2004 05:29:05 -0000 1.3
@@ -37,100 +37,101 @@
super();
}
- /**
- * @param mu comparison constant
- * @param observed array of values
- * @return t statistic
- * @throws IllegalArgumentException if input array length is less than 2
- */
- public double t(double mu, double[] observed)
- throws IllegalArgumentException {
- if ((observed == null) || (observed.length < 2)) {
- throw new IllegalArgumentException("insufficient data for t statistic");
- }
- return t(StatUtils.mean(observed), mu, StatUtils.variance(observed),
observed.length);
- }
+ //----------------------------------------------- Protected methods
/**
- * @param mu constant value to compare sample mean against
- * @param sample array of sample data values
- * @param alpha significance level of the test
- * @return p-value
- * @throws IllegalArgumentException if the precondition is not met
- * @throws MathException if an error occurs computing the p-value
+ * Computes approximate degrees of freedom for 2-sample t-test.
+ *
+ * @param v1 first sample variance
+ * @param v2 second sample variance
+ * @param n1 first sample n
+ * @param n2 second sample n
+ * @return approximate degrees of freedom
*/
- public boolean tTest(double mu, double[] sample, double alpha)
- throws IllegalArgumentException, MathException {
- if ((alpha <= 0) || (alpha > 0.5)) {
- throw new IllegalArgumentException("bad significance level: " + alpha);
- }
- return (tTest(mu, sample) < alpha);
+ protected double df(double v1, double v2, double n1, double n2) {
+ return (((v1 / n1) + (v2 / n2)) * ((v1 / n1) + (v2 / n2))) /
+ ((v1 * v1) / (n1 * n1 * (n1 - 1d)) + (v2 * v2) /
+ (n2 * n2 * (n2 - 1d)));
}
-
- /**
- * @param sample1 array of sample data values
- * @param sample2 array of sample data values
- * @return t-statistic
- * @throws IllegalArgumentException if the precondition is not met
+
+ /* (non-Javadoc)
+ * @see org.apache.commons.math.stat.inference.TTest#pairedT(double[], double[])
*/
- public double t(double[] sample1, double[] sample2)
- throws IllegalArgumentException {
+ public double pairedT(double[] sample1, double[] sample2)
+ throws IllegalArgumentException, MathException {
if ((sample1 == null) || (sample2 == null ||
Math.min(sample1.length, sample2.length) < 2)) {
throw new IllegalArgumentException("insufficient data for t statistic");
}
- return t(StatUtils.mean(sample1), StatUtils.mean(sample2),
StatUtils.variance(sample1),
- StatUtils.variance(sample2), (double) sample1.length, (double)
sample2.length);
+ double meanDifference = StatUtils.meanDifference(sample1, sample2);
+ return t(meanDifference, 0,
+ StatUtils.varianceDifference(sample1, sample2, meanDifference),
+ (double) sample1.length);
+ }
+
+ /* (non-Javadoc)
+ * @see org.apache.commons.math.stat.inference.TTest#pairedTTest(double[],
double[])
+ */
+ public double pairedTTest(double[] sample1, double[] sample2)
+ throws IllegalArgumentException, MathException {
+ double meanDifference = StatUtils.meanDifference(sample1, sample2);
+ return tTest(meanDifference, 0,
+ StatUtils.varianceDifference(sample1, sample2, meanDifference),
+ (double) sample1.length);
+ }
+
+ /* (non-Javadoc)
+ * @see org.apache.commons.math.stat.inference.TTest#pairedTTest(double[],
double[], double)
+ */
+ public boolean pairedTTest(
+ double[] sample1,
+ double[] sample2,
+ double alpha)
+ throws IllegalArgumentException, MathException {
+ // TODO Auto-generated method stub
+ return false;
}
/**
- *
- * @param sample1 array of sample data values
- * @param sample2 array of sample data values
- * @return tTest p-value
- * @throws IllegalArgumentException if the precondition is not met
- * @throws MathException if an error occurs computing the p-value
+ * Computes t test statistic for 1-sample t-test.
+ *
+ * @param m sample mean
+ * @param mu constant to test against
+ * @param v sample variance
+ * @param n sample n
+ * @return t test statistic
*/
- public double tTest(double[] sample1, double[] sample2)
- throws IllegalArgumentException, MathException {
- if ((sample1 == null) || (sample2 == null ||
- Math.min(sample1.length, sample2.length) < 2)) {
- throw new IllegalArgumentException("insufficient data");
- }
- return tTest(StatUtils.mean(sample1), StatUtils.mean(sample2),
StatUtils.variance(sample1),
- StatUtils.variance(sample2), (double) sample1.length, (double)
sample2.length);
+ protected double t(double m, double mu, double v, double n) {
+ return (m - mu) / Math.sqrt(v / n);
}
/**
- * @param sample1 array of sample data values
- * @param sample2 array of sample data values
- * @param alpha significance level
- * @return true if the null hypothesis can be rejected with
- * confidence 1 - alpha
- * @throws IllegalArgumentException if the preconditions are not met
- * @throws MathException if an error occurs performing the test
+ * Computes t test statistic for 2-sample t-test.
+ *
+ * @param m1 first sample mean
+ * @param m2 second sample mean
+ * @param v1 first sample variance
+ * @param v2 second sample variance
+ * @param n1 first sample n
+ * @param n2 second sample n
+ * @return t test statistic
*/
- public boolean tTest(double[] sample1, double[] sample2, double alpha)
- throws IllegalArgumentException, MathException {
- if ((alpha <= 0) || (alpha > 0.5)) {
- throw new IllegalArgumentException("bad significance level: " + alpha);
- }
- return (tTest(sample1, sample2) < alpha);
+ protected double t(double m1, double m2, double v1, double v2, double
n1,double n2) {
+ return (m1 - m2) / Math.sqrt((v1 / n1) + (v2 / n2));
}
/**
- * @param mu constant value to compare sample mean against
- * @param sample array of sample data values
- * @return p-value
- * @throws IllegalArgumentException if the precondition is not met
- * @throws MathException if an error occurs computing the p-value
+ * @param mu comparison constant
+ * @param observed array of values
+ * @return t statistic
+ * @throws IllegalArgumentException if input array length is less than 2
*/
- public double tTest(double mu, double[] sample)
- throws IllegalArgumentException, MathException {
- if ((sample == null) || (sample.length < 2)) {
+ public double t(double mu, double[] observed)
+ throws IllegalArgumentException {
+ if ((observed == null) || (observed.length < 2)) {
throw new IllegalArgumentException("insufficient data for t statistic");
}
- return tTest( StatUtils.mean(sample), mu, StatUtils.variance(sample),
sample.length);
+ return t(StatUtils.mean(observed), mu, StatUtils.variance(observed),
observed.length);
}
/**
@@ -148,6 +149,22 @@
}
/**
+ * @param sample1 array of sample data values
+ * @param sample2 array of sample data values
+ * @return t-statistic
+ * @throws IllegalArgumentException if the precondition is not met
+ */
+ public double t(double[] sample1, double[] sample2)
+ throws IllegalArgumentException {
+ if ((sample1 == null) || (sample2 == null ||
+ Math.min(sample1.length, sample2.length) < 2)) {
+ throw new IllegalArgumentException("insufficient data for t statistic");
+ }
+ return t(StatUtils.mean(sample1), StatUtils.mean(sample2),
StatUtils.variance(sample1),
+ StatUtils.variance(sample2), (double) sample1.length, (double)
sample2.length);
+ }
+
+ /**
* @param sampleStats1 StatisticalSummary describing data from the first sample
* @param sampleStats2 StatisticalSummary describing data from the second sample
* @return t statistic
@@ -165,54 +182,72 @@
}
/**
- * @param sampleStats1 StatisticalSummary describing data from the first sample
- * @param sampleStats2 StatisticalSummary describing data from the second sample
- * @return p-value for t-test
- * @throws IllegalArgumentException if the precondition is not met
+ * Computes p-value for 2-sided, 1-sample t-test.
+ *
+ * @param m sample mean
+ * @param mu constant to test against
+ * @param v sample variance
+ * @param n sample n
+ * @return p-value
* @throws MathException if an error occurs computing the p-value
*/
- public double tTest(StatisticalSummary sampleStats1, StatisticalSummary
sampleStats2)
- throws IllegalArgumentException, MathException {
- if ((sampleStats1 == null) || (sampleStats2 == null ||
- Math.min(sampleStats1.getN(), sampleStats2.getN()) < 2)) {
- throw new IllegalArgumentException("insufficient data for t statistic");
- }
- return tTest(sampleStats1.getMean(), sampleStats2.getMean(),
sampleStats1.getVariance(),
- sampleStats2.getVariance(), (double) sampleStats1.getN(), (double)
sampleStats2.getN());
+ protected double tTest(double m, double mu, double v, double n)
+ throws MathException {
+ double t = Math.abs(t(m, mu, v, n));
+ TDistribution tDistribution =
+ DistributionFactory.newInstance().createTDistribution(n - 1);
+ return 1.0 - tDistribution.cumulativeProbability(-t, t);
}
/**
- * @param sampleStats1 StatisticalSummary describing sample data values
- * @param sampleStats2 StatisticalSummary describing sample data values
- * @param alpha significance level of the test
- * @return true if the null hypothesis can be rejected with
- * confidence 1 - alpha
- * @throws IllegalArgumentException if the preconditions are not met
- * @throws MathException if an error occurs performing the test
+ * Computes p-value for 2-sided, 2-sample t-test.
+ *
+ * @param m1 first sample mean
+ * @param m2 second sample mean
+ * @param v1 first sample variance
+ * @param v2 second sample variance
+ * @param n1 first sample n
+ * @param n2 second sample n
+ * @return p-value
+ * @throws MathException if an error occurs computing the p-value
*/
- public boolean tTest(StatisticalSummary sampleStats1, StatisticalSummary
sampleStats2,
- double alpha)
+ protected double tTest(double m1, double m2, double v1, double v2, double n1,
double n2)
+ throws MathException {
+ double t = Math.abs(t(m1, m2, v1, v2, n1, n2));
+ TDistribution tDistribution =
+ DistributionFactory.newInstance().createTDistribution(df(v1, v2, n1,
n2));
+ return 1.0 - tDistribution.cumulativeProbability(-t, t);
+ }
+
+ /**
+ * @param mu constant value to compare sample mean against
+ * @param sample array of sample data values
+ * @return p-value
+ * @throws IllegalArgumentException if the precondition is not met
+ * @throws MathException if an error occurs computing the p-value
+ */
+ public double tTest(double mu, double[] sample)
throws IllegalArgumentException, MathException {
- if ((alpha <= 0) || (alpha > 0.5)) {
- throw new IllegalArgumentException("bad significance level: " + alpha);
+ if ((sample == null) || (sample.length < 2)) {
+ throw new IllegalArgumentException("insufficient data for t statistic");
}
- return (tTest(sampleStats1, sampleStats2) < alpha);
+ return tTest( StatUtils.mean(sample), mu, StatUtils.variance(sample),
sample.length);
}
/**
* @param mu constant value to compare sample mean against
- * @param sampleStats StatisticalSummary describing sample data values
+ * @param sample array of sample data values
* @param alpha significance level of the test
* @return p-value
* @throws IllegalArgumentException if the precondition is not met
* @throws MathException if an error occurs computing the p-value
*/
- public boolean tTest( double mu, StatisticalSummary sampleStats,double alpha)
+ public boolean tTest(double mu, double[] sample, double alpha)
throws IllegalArgumentException, MathException {
if ((alpha <= 0) || (alpha > 0.5)) {
throw new IllegalArgumentException("bad significance level: " + alpha);
}
- return (tTest(mu, sampleStats) < alpha);
+ return (tTest(mu, sample) < alpha);
}
/**
@@ -230,86 +265,90 @@
return tTest(sampleStats.getMean(), mu, sampleStats.getVariance(),
sampleStats.getN());
}
- //----------------------------------------------- Protected methods
-
/**
- * Computes approximate degrees of freedom for 2-sample t-test.
- *
- * @param v1 first sample variance
- * @param v2 second sample variance
- * @param n1 first sample n
- * @param n2 second sample n
- * @return approximate degrees of freedom
+ * @param mu constant value to compare sample mean against
+ * @param sampleStats StatisticalSummary describing sample data values
+ * @param alpha significance level of the test
+ * @return p-value
+ * @throws IllegalArgumentException if the precondition is not met
+ * @throws MathException if an error occurs computing the p-value
*/
- protected double df(double v1, double v2, double n1, double n2) {
- return (((v1 / n1) + (v2 / n2)) * ((v1 / n1) + (v2 / n2))) /
- ((v1 * v1) / (n1 * n1 * (n1 - 1d)) + (v2 * v2) /
- (n2 * n2 * (n2 - 1d)));
+ public boolean tTest( double mu, StatisticalSummary sampleStats,double alpha)
+ throws IllegalArgumentException, MathException {
+ if ((alpha <= 0) || (alpha > 0.5)) {
+ throw new IllegalArgumentException("bad significance level: " + alpha);
+ }
+ return (tTest(mu, sampleStats) < alpha);
}
/**
- * Computes t test statistic for 2-sample t-test.
- *
- * @param m1 first sample mean
- * @param m2 second sample mean
- * @param v1 first sample variance
- * @param v2 second sample variance
- * @param n1 first sample n
- * @param n2 second sample n
- * @return t test statistic
+ *
+ * @param sample1 array of sample data values
+ * @param sample2 array of sample data values
+ * @return tTest p-value
+ * @throws IllegalArgumentException if the precondition is not met
+ * @throws MathException if an error occurs computing the p-value
*/
- protected double t(double m1, double m2, double v1, double v2, double
n1,double n2) {
- return (m1 - m2) / Math.sqrt((v1 / n1) + (v2 / n2));
+ public double tTest(double[] sample1, double[] sample2)
+ throws IllegalArgumentException, MathException {
+ if ((sample1 == null) || (sample2 == null ||
+ Math.min(sample1.length, sample2.length) < 2)) {
+ throw new IllegalArgumentException("insufficient data");
+ }
+ return tTest(StatUtils.mean(sample1), StatUtils.mean(sample2),
StatUtils.variance(sample1),
+ StatUtils.variance(sample2), (double) sample1.length, (double)
sample2.length);
}
/**
- * Computes t test statistic for 1-sample t-test.
- *
- * @param m sample mean
- * @param mu constant to test against
- * @param v sample variance
- * @param n sample n
- * @return t test statistic
+ * @param sample1 array of sample data values
+ * @param sample2 array of sample data values
+ * @param alpha significance level
+ * @return true if the null hypothesis can be rejected with
+ * confidence 1 - alpha
+ * @throws IllegalArgumentException if the preconditions are not met
+ * @throws MathException if an error occurs performing the test
*/
- protected double t(double m, double mu, double v, double n) {
- return (m - mu) / Math.sqrt(v / n);
+ public boolean tTest(double[] sample1, double[] sample2, double alpha)
+ throws IllegalArgumentException, MathException {
+ if ((alpha <= 0) || (alpha > 0.5)) {
+ throw new IllegalArgumentException("bad significance level: " + alpha);
+ }
+ return (tTest(sample1, sample2) < alpha);
}
/**
- * Computes p-value for 2-sided, 2-sample t-test.
- *
- * @param m1 first sample mean
- * @param m2 second sample mean
- * @param v1 first sample variance
- * @param v2 second sample variance
- * @param n1 first sample n
- * @param n2 second sample n
- * @return p-value
+ * @param sampleStats1 StatisticalSummary describing data from the first sample
+ * @param sampleStats2 StatisticalSummary describing data from the second sample
+ * @return p-value for t-test
+ * @throws IllegalArgumentException if the precondition is not met
* @throws MathException if an error occurs computing the p-value
*/
- protected double tTest(double m1, double m2, double v1, double v2, double n1,
double n2)
- throws MathException {
- double t = Math.abs(t(m1, m2, v1, v2, n1, n2));
- TDistribution tDistribution =
- DistributionFactory.newInstance().createTDistribution(df(v1, v2, n1,
n2));
- return 1.0 - tDistribution.cumulativeProbability(-t, t);
+ public double tTest(StatisticalSummary sampleStats1, StatisticalSummary
sampleStats2)
+ throws IllegalArgumentException, MathException {
+ if ((sampleStats1 == null) || (sampleStats2 == null ||
+ Math.min(sampleStats1.getN(), sampleStats2.getN()) < 2)) {
+ throw new IllegalArgumentException("insufficient data for t statistic");
+ }
+ return tTest(sampleStats1.getMean(), sampleStats2.getMean(),
sampleStats1.getVariance(),
+ sampleStats2.getVariance(), (double) sampleStats1.getN(), (double)
sampleStats2.getN());
}
/**
- * Computes p-value for 2-sided, 1-sample t-test.
- *
- * @param m sample mean
- * @param mu constant to test against
- * @param v sample variance
- * @param n sample n
- * @return p-value
- * @throws MathException if an error occurs computing the p-value
+ * @param sampleStats1 StatisticalSummary describing sample data values
+ * @param sampleStats2 StatisticalSummary describing sample data values
+ * @param alpha significance level of the test
+ * @return true if the null hypothesis can be rejected with
+ * confidence 1 - alpha
+ * @throws IllegalArgumentException if the preconditions are not met
+ * @throws MathException if an error occurs performing the test
*/
- protected double tTest(double m, double mu, double v, double n)
- throws MathException {
- double t = Math.abs(t(m, mu, v, n));
- TDistribution tDistribution =
- DistributionFactory.newInstance().createTDistribution(n - 1);
- return 1.0 - tDistribution.cumulativeProbability(-t, t);
+ public boolean tTest(StatisticalSummary sampleStats1, StatisticalSummary
sampleStats2,
+ double alpha)
+ throws IllegalArgumentException, MathException {
+ if ((alpha <= 0) || (alpha > 0.5)) {
+ throw new IllegalArgumentException("bad significance level: " + alpha);
+ }
+ return (tTest(sampleStats1, sampleStats2) < alpha);
}
+
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]