psteitz 2004/09/01 08:19:32
Added: math/src/java/org/apache/commons/math/stat/multivariate
SimpleRegression.java
math/src/test/org/apache/commons/math/stat/multivariate
SimpleRegressionTest.java
Removed: math/src/java/org/apache/commons/math/stat/multivariate
BivariateRegression.java
math/src/test/org/apache/commons/math/stat/multivariate
BivariateRegressionTest.java
Log:
Renamed BivariateRegression to SimpleRegression.
Revision Changes Path
1.1
jakarta-commons/math/src/java/org/apache/commons/math/stat/multivariate/SimpleRegression.java
Index: SimpleRegression.java
===================================================================
/*
* Copyright 2003-2004 The Apache Software Foundation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.math.stat.multivariate;
import java.io.Serializable;
import org.apache.commons.math.MathException;
import org.apache.commons.math.distribution.DistributionFactory;
import org.apache.commons.math.distribution.TDistribution;
/**
* Estimates an ordinary least squares regression model
* with one independent variable.
* <p>
* <code> y = intercept + slope * x </code>
* <p>
* Standard errors for <code>intercept</code> and <code>slope</code> are
* available as well as ANOVA, r-square and Pearson's r statistics.
* <p>
* Observations (x,y pairs) can be added to the model one at a time or they
* can be provided in a 2-dimensional array. The observations are not stored
* in memory, so there is no limit to the number of observations that can be
* added to the model.
* <p>
* <strong>Usage Notes</strong>: <ul>
* <li> When there are fewer than two observations in the model, or when
* there is no variation in the x values (i.e. all x values are the same)
* all statistics return <code>NaN</code>. At least two observations with
* different x coordinates are requred to estimate a bivariate regression
* model.
* </li>
* <li> getters for the statistics always compute values based on the current
* set of observations -- i.e., you can get statistics, then add more data
* and get updated statistics without using a new instance. There is no
* "compute" method that updates all statistics. Each of the getters performs
* the necessary computations to return the requested statistic.</li>
* </ul>
*
* @version $Revision: 1.1 $ $Date: 2004/09/01 15:19:32 $
*/
public class SimpleRegression implements Serializable {
/** Serializable version identifier */
static final long serialVersionUID = -3004689053607543335L;
/** sum of x values */
private double sumX = 0d;
/** total variation in x (sum of squared deviations from xbar) */
private double sumXX = 0d;
/** sum of y values */
private double sumY = 0d;
/** total variation in y (sum of squared deviations from ybar) */
private double sumYY = 0d;
/** sum of products */
private double sumXY = 0d;
/** number of observations */
private long n = 0;
/** mean of accumulated x values, used in updating formulas */
private double xbar = 0;
/** mean of accumulated y values, used in updating formulas */
private double ybar = 0;
// ---------------------Public methods--------------------------------------
/**
* Create an empty SimpleRegression instance
*/
public SimpleRegression() {
super();
}
/**
* Adds the observation (x,y) to the regression data set.
* <p>
* Uses updating formulas for means and sums of squares defined in
* "Algorithms for Computing the Sample Variance: Analysis and
* Recommendations", Chan, T.F., Golub, G.H., and LeVeque, R.J.
* 1983, American Statistician, vol. 37, pp. 242-247, referenced in
* Weisberg, S. "Applied Linear Regression". 2nd Ed. 1985
*
*
* @param x independent variable value
* @param y dependent variable value
*/
public void addData(double x, double y) {
if (n == 0) {
xbar = x;
ybar = y;
} else {
double dx = x - xbar;
double dy = y - ybar;
sumXX += dx * dx * (double) n / (double) (n + 1.0);
sumYY += dy * dy * (double) n / (double) (n + 1.0);
sumXY += dx * dy * (double) n / (double) (n + 1.0);
xbar += dx / (double) (n + 1.0);
ybar += dy / (double) (n + 1.0);
}
sumX += x;
sumY += y;
n++;
}
/**
* Adds the observations represented by the elements in
* <code>data</code>.
* <p>
* <code>(data[0][0],data[0][1])</code> will be the first observation, then
* <code>(data[1][0],data[1][1])</code>, etc.
* <p>
* This method does not replace data that has already been added. The
* observations represented by <code>data</code> are added to the existing
* dataset.
* <p>
* To replace all data, use <code>clear()</code> before adding the new
* data.
*
* @param data array of observations to be added
*/
public void addData(double[][] data) {
for (int i = 0; i < data.length; i++) {
addData(data[i][0], data[i][1]);
}
}
/**
* Clears all data from the model.
*/
public void clear() {
sumX = 0d;
sumXX = 0d;
sumY = 0d;
sumYY = 0d;
sumXY = 0d;
n = 0;
}
/**
* Returns the number of observations that have been added to the model.
*
* @return n number of observations that have been added.
*/
public long getN() {
return n;
}
/**
* Returns the "predicted" <code>y</code> value associated with the
* supplied <code>x</code> value, based on the data that has been
* added to the model when this method is activated.
* <p>
* <code> predict(x) = intercept + slope * x </code>
* <p>
* <strong>Preconditions</strong>: <ul>
* <li>At least two observations (with at least two different x values)
* must have been added before invoking this method. If this method is
* invoked before a model can be estimated, <code>Double,NaN</code> is
* returned.
* </li></ul>
*
* @param x input <code>x</code> value
* @return predicted <code>y</code> value
*/
public double predict(double x) {
double b1 = getSlope();
return getIntercept(b1) + b1 * x;
}
/**
* Returns the intercept of the estimated regression line.
* <p>
* The least squares estimate of the intercept is computed using the
* <a href="http://www.xycoon.com/estimation4.htm">normal equations</a>.
* The intercept is sometimes denoted b0.
* <p>
* <strong>Preconditions</strong>: <ul>
* <li>At least two observations (with at least two different x values)
* must have been added before invoking this method. If this method is
* invoked before a model can be estimated, <code>Double,NaN</code> is
* returned.
* </li></ul>
*
* @return the intercept of the regression line
*/
public double getIntercept() {
return getIntercept(getSlope());
}
/**
* Returns the slope of the estimated regression line.
* <p>
* The least squares estimate of the slope is computed using the
* <a href="http://www.xycoon.com/estimation4.htm">normal equations</a>.
* The slope is sometimes denoted b1.
* <p>
* <strong>Preconditions</strong>: <ul>
* <li>At least two observations (with at least two different x values)
* must have been added before invoking this method. If this method is
* invoked before a model can be estimated, <code>Double.NaN</code> is
* returned.
* </li></ul>
*
* @return the slope of the regression line
*/
public double getSlope() {
if (n < 2) {
return Double.NaN; //not enough data
}
if (Math.abs(sumXX) < 10 * Double.MIN_VALUE) {
return Double.NaN; //not enough variation in x
}
return sumXY / sumXX;
}
/**
* Returns the <a href="http://www.xycoon.com/SumOfSquares.htm">
* sum of squared errors</a> (SSE) associated with the regression
* model.
* <p>
* <strong>Preconditions</strong>: <ul>
* <li>At least two observations (with at least two different x values)
* must have been added before invoking this method. If this method is
* invoked before a model can be estimated, <code>Double,NaN</code> is
* returned.
* </li></ul>
*
* @return sum of squared errors associated with the regression model
*/
public double getSumSquaredErrors() {
return getSumSquaredErrors(getSlope());
}
/**
* Returns the sum of squared deviations of the y values about their mean.
* <p>
* This is defined as SSTO
* <a href="http://www.xycoon.com/SumOfSquares.htm">here</a>.
* <p>
* If <code>n < 2</code>, this returns <code>Double.NaN</code>.
*
* @return sum of squared deviations of y values
*/
public double getTotalSumSquares() {
if (n < 2) {
return Double.NaN;
}
return sumYY;
}
/**
* Returns the sum of squared deviations of the predicted y values about
* their mean (which equals the mean of y).
* <p>
* This is usually abbreviated SSR or SSM. It is defined as SSM
* <a href="http://www.xycoon.com/SumOfSquares.htm">here</a>
* <p>
* <strong>Preconditions</strong>: <ul>
* <li>At least two observations (with at least two different x values)
* must have been added before invoking this method. If this method is
* invoked before a model can be estimated, <code>Double.NaN</code> is
* returned.
* </li></ul>
*
* @return sum of squared deviations of predicted y values
*/
public double getRegressionSumSquares() {
return getRegressionSumSquares(getSlope());
}
/**
* Returns the sum of squared errors divided by the degrees of freedom,
* usually abbreviated MSE.
* <p>
* If there are fewer than <strong>three</strong> data pairs in the model,
* or if there is no variation in <code>x</code>, this returns
* <code>Double.NaN</code>.
*
* @return sum of squared deviations of y values
*/
public double getMeanSquareError() {
if (n < 3) {
return Double.NaN;
}
return getSumSquaredErrors() / (double) (n - 2);
}
/**
* Returns <a href="http://mathworld.wolfram.com/CorrelationCoefficient.html">
* Pearson's product moment correlation coefficient</a>,
* usually denoted r.
* <p>
* <strong>Preconditions</strong>: <ul>
* <li>At least two observations (with at least two different x values)
* must have been added before invoking this method. If this method is
* invoked before a model can be estimated, <code>Double,NaN</code> is
* returned.
* </li></ul>
*
* @return Pearson's r
*/
public double getR() {
double b1 = getSlope();
double result = Math.sqrt(getRSquare(b1));
if (b1 < 0) {
result = -result;
}
return result;
}
/**
* Returns the <a href="http://www.xycoon.com/coefficient1.htm">
* coefficient of determination</a>,
* usually denoted r-square.
* <p>
* <strong>Preconditions</strong>: <ul>
* <li>At least two observations (with at least two different x values)
* must have been added before invoking this method. If this method is
* invoked before a model can be estimated, <code>Double,NaN</code> is
* returned.
* </li></ul>
*
* @return r-square
*/
public double getRSquare() {
return getRSquare(getSlope());
}
/**
* Returns the <a href="http://www.xycoon.com/standarderrorb0.htm">
* standard error of the intercept estimate</a>,
* usually denoted s(b0).
* <p>
* If there are fewer that <strong>three</strong> observations in the
* model, or if there is no variation in x, this returns
* <code>Double.NaN</code>.
*
* @return standard error associated with intercept estimate
*/
public double getInterceptStdErr() {
return Math.sqrt(
getMeanSquareError() * ((1d / (double) n) + (xbar * xbar) / sumXX));
}
/**
* Returns the <a href="http://www.xycoon.com/standerrorb(1).htm">standard
* error of the slope estimate</a>,
* usually denoted s(b1).
* <p>
* If there are fewer that <strong>three</strong> data pairs in the model,
* or if there is no variation in x, this returns <code>Double.NaN</code>.
*
* @return standard error associated with slope estimate
*/
public double getSlopeStdErr() {
return Math.sqrt(getMeanSquareError() / sumXX);
}
/**
* Returns the half-width of a 95% confidence interval for the slope
* estimate.
* <p>
* The 95% confidence interval is
* <p>
* <code>(getSlope() - getSlopeConfidenceInterval(),
* getSlope() + getSlopeConfidenceInterval())</code>
* <p>
* If there are fewer that <strong>three</strong> observations in the
* model, or if there is no variation in x, this returns
* <code>Double.NaN</code>.
* <p>
* <strong>Usage Note</strong>:<br>
* The validity of this statistic depends on the assumption that the
* observations included in the model are drawn from a
* <a href="http://mathworld.wolfram.com/BivariateNormalDistribution.html">
* Bivariate Normal Distribution</a>.
*
* @return half-width of 95% confidence interval for the slope estimate
*
* @throws MathException if the confidence interval can not be computed.
*/
public double getSlopeConfidenceInterval() throws MathException {
return getSlopeConfidenceInterval(0.05d);
}
/**
* Returns the half-width of a (100-100*alpha)% confidence interval for
* the slope estimate.
* <p>
* The (100-100*alpha)% confidence interval is
* <p>
* <code>(getSlope() - getSlopeConfidenceInterval(),
* getSlope() + getSlopeConfidenceInterval())</code>
* <p>
* To request, for example, a 99% confidence interval, use
* <code>alpha = .01</code>
* <p>
* <strong>Usage Note</strong>:<br>
* The validity of this statistic depends on the assumption that the
* observations included in the model are drawn from a
* <a href="http://mathworld.wolfram.com/BivariateNormalDistribution.html">
* Bivariate Normal Distribution</a>.
* <p>
* <strong> Preconditions:</strong><ul>
* <li>If there are fewer that <strong>three</strong> observations in the
* model, or if there is no variation in x, this returns
* <code>Double.NaN</code>.
* </li>
* <li><code>(0 < alpha < 1)</code>; otherwise an
* <code>IllegalArgumentException</code> is thrown.
* </li></ul>
*
* @param alpha the desired significance level
* @return half-width of 95% confidence interval for the slope estimate
* @throws MathException if the confidence interval can not be computed.
*/
public double getSlopeConfidenceInterval(double alpha)
throws MathException {
if (alpha >= 1 || alpha <= 0) {
throw new IllegalArgumentException();
}
return getSlopeStdErr() *
getTDistribution().inverseCumulativeProbability(1d - alpha / 2d);
}
/**
* Returns the significance level of the slope (equiv) correlation.
* <p>
* Specifically, the returned value is the smallest <code>alpha</code>
* such that the slope confidence interval with significance level
* equal to <code>alpha</code> does not include <code>0</code>.
* On regression output, this is often denoted <code>Prob(|t| > 0)</code>
* <p>
* <strong>Usage Note</strong>:<br>
* The validity of this statistic depends on the assumption that the
* observations included in the model are drawn from a
* <a href="http://mathworld.wolfram.com/BivariateNormalDistribution.html">
* Bivariate Normal Distribution</a>.
* <p>
* If there are fewer that <strong>three</strong> observations in the
* model, or if there is no variation in x, this returns
* <code>Double.NaN</code>.
*
* @return significance level for slope/correlation
* @throws MathException if the significance level can not be computed.
*/
public double getSignificance() throws MathException {
return (
1.0 - getTDistribution().cumulativeProbability(
Math.abs(getSlope()) / getSlopeStdErr()));
}
// ---------------------Private methods-----------------------------------
/**
* Returns the intercept of the estimated regression line, given the slope.
* <p>
* Will return <code>NaN</code> if slope is <code>NaN</code>.
*
* @param slope current slope
* @return the intercept of the regression line
*/
private double getIntercept(double slope) {
return (sumY - slope * sumX) / ((double) n);
}
/**
* Returns the sum of squared errors associated with the regression
* model, using the slope of the regression line.
* <p>
* Returns NaN if the slope is NaN.
*
* @param b1 current slope
* @return sum of squared errors associated with the regression model
*/
private double getSumSquaredErrors(double b1) {
return sumYY - sumXY * sumXY / sumXX;
}
/**
* Computes r-square from the slope.
* <p>
* will return NaN if slope is Nan.
*
* @param b1 current slope
* @return r-square
*/
private double getRSquare(double b1) {
double ssto = getTotalSumSquares();
return (ssto - getSumSquaredErrors(b1)) / ssto;
}
/**
* Computes SSR from b1.
*
* @param slope regression slope estimate
* @return sum of squared deviations of predicted y values
*/
private double getRegressionSumSquares(double slope) {
return slope * slope * sumXX;
}
/**
* Uses distribution framework to get a t distribution instance
* with df = n - 2
*
* @return t distribution with df = n - 2
*/
private TDistribution getTDistribution() {
return DistributionFactory.newInstance().createTDistribution(n - 2);
}
}
1.1
jakarta-commons/math/src/test/org/apache/commons/math/stat/multivariate/SimpleRegressionTest.java
Index: SimpleRegressionTest.java
===================================================================
/*
* Copyright 2003-2004 The Apache Software Foundation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.math.stat.multivariate;
import java.util.Random;
import junit.framework.Test;
import junit.framework.TestCase;
import junit.framework.TestSuite;
/**
* Test cases for the TestStatistic class.
*
* @version $Revision: 1.1 $ $Date: 2004/09/01 15:19:32 $
*/
public final class SimpleRegressionTest extends TestCase {
/*
* NIST "Norris" refernce data set from
* http://www.itl.nist.gov/div898/strd/lls/data/LINKS/DATA/Norris.dat
* Strangely, order is {y,x}
*/
private double[][] data = { { 0.1, 0.2 }, {338.8, 337.4 }, {118.1, 118.2 },
{888.0, 884.6 }, {9.2, 10.1 }, {228.1, 226.5 }, {668.5, 666.3 }, {998.5,
996.3 },
{449.1, 448.6 }, {778.9, 777.0 }, {559.2, 558.2 }, {0.3, 0.4 }, {0.1,
0.6 }, {778.1, 775.5 },
{668.8, 666.9 }, {339.3, 338.0 }, {448.9, 447.5 }, {10.8, 11.6 },
{557.7, 556.0 },
{228.3, 228.1 }, {998.0, 995.8 }, {888.8, 887.6 }, {119.6, 120.2 },
{0.3, 0.3 },
{0.6, 0.3 }, {557.6, 556.8 }, {339.3, 339.1 }, {888.0, 887.2 }, {998.5,
999.0 },
{778.9, 779.0 }, {10.2, 11.1 }, {117.6, 118.3 }, {228.9, 229.2 },
{668.4, 669.1 },
{449.2, 448.9 }, {0.2, 0.5 }
};
/*
* Correlation example from
* http://www.xycoon.com/correlation.htm
*/
private double[][] corrData = { { 101.0, 99.2 }, {100.1, 99.0 }, {100.0, 100.0
},
{90.6, 111.6 }, {86.5, 122.2 }, {89.7, 117.6 }, {90.6, 121.1 }, {82.8,
136.0 },
{70.1, 154.2 }, {65.4, 153.6 }, {61.3, 158.5 }, {62.5, 140.6 }, {63.6,
136.2 },
{52.6, 168.0 }, {59.7, 154.3 }, {59.5, 149.0 }, {61.3, 165.5 }
};
/*
* From Moore and Mcabe, "Introduction to the Practice of Statistics"
* Example 10.3
*/
private double[][] infData = { { 15.6, 5.2 }, {26.8, 6.1 }, {37.8, 8.7 }, {36.4,
8.5 },
{35.5, 8.8 }, {18.6, 4.9 }, {15.3, 4.5 }, {7.9, 2.5 }, {0.0, 1.1 }
};
/*
* From http://www.xycoon.com/simple_linear_regression.htm
*/
private double[][] infData2 = { { 1, 3 }, {2, 5 }, {3, 7 }, {4, 14 }, {5, 11 }};
public SimpleRegressionTest(String name) {
super(name);
}
public void setUp() {
}
public static Test suite() {
TestSuite suite = new TestSuite(SimpleRegressionTest.class);
suite.setName("BivariateRegression Tests");
return suite;
}
public void testNorris() {
SimpleRegression regression = new SimpleRegression();
for (int i = 0; i < data.length; i++) {
regression.addData(data[i][1], data[i][0]);
}
assertEquals("slope", 1.00211681802045, regression.getSlope(), 10E-12);
assertEquals("slope std err", 0.429796848199937E-03,
regression.getSlopeStdErr(),10E-12);
assertEquals("number of observations", 36, regression.getN());
assertEquals( "intercept", -0.262323073774029,
regression.getIntercept(),10E-12);
assertEquals("std err intercept", 0.232818234301152,
regression.getInterceptStdErr(),10E-12);
assertEquals("r-square", 0.999993745883712,
regression.getRSquare(), 10E-12);
assertEquals("SSR", 4255954.13232369,
regression.getRegressionSumSquares(), 10E-9);
assertEquals("MSE", 0.782864662630069,
regression.getMeanSquareError(), 10E-10);
assertEquals("SSE", 26.6173985294224,
regression.getSumSquaredErrors(),10E-9);
assertEquals( "predict(0)", -0.262323073774029,
regression.predict(0), 10E-12);
assertEquals("predict(1)", 1.00211681802045 - 0.262323073774029,
regression.predict(1), 10E-12);
}
public void testCorr() {
SimpleRegression regression = new SimpleRegression();
regression.addData(corrData);
assertEquals("number of observations", 17, regression.getN());
assertEquals("r-square", .896123, regression.getRSquare(), 10E-6);
assertEquals("r", -.946638, regression.getR(), 10E-6);
}
public void testNaNs() {
SimpleRegression regression = new SimpleRegression();
assertTrue("intercept not NaN", Double.isNaN(regression.getIntercept()));
assertTrue("slope not NaN", Double.isNaN(regression.getSlope()));
assertTrue("slope std err not NaN",
Double.isNaN(regression.getSlopeStdErr()));
assertTrue("intercept std err not NaN",
Double.isNaN(regression.getInterceptStdErr()));
assertTrue("MSE not NaN", Double.isNaN(regression.getMeanSquareError()));
assertTrue("e not NaN", Double.isNaN(regression.getR()));
assertTrue("r-square not NaN", Double.isNaN(regression.getRSquare()));
assertTrue( "RSS not NaN",
Double.isNaN(regression.getRegressionSumSquares()));
assertTrue("SSE not NaN",Double.isNaN(regression.getSumSquaredErrors()));
assertTrue("SSTO not NaN", Double.isNaN(regression.getTotalSumSquares()));
assertTrue("predict not NaN", Double.isNaN(regression.predict(0)));
regression.addData(1, 2);
regression.addData(1, 3);
// No x variation, so these should still blow...
assertTrue("intercept not NaN", Double.isNaN(regression.getIntercept()));
assertTrue("slope not NaN", Double.isNaN(regression.getSlope()));
assertTrue("slope std err not NaN",
Double.isNaN(regression.getSlopeStdErr()));
assertTrue("intercept std err not NaN",
Double.isNaN(regression.getInterceptStdErr()));
assertTrue("MSE not NaN", Double.isNaN(regression.getMeanSquareError()));
assertTrue("e not NaN", Double.isNaN(regression.getR()));
assertTrue("r-square not NaN", Double.isNaN(regression.getRSquare()));
assertTrue("RSS not NaN",
Double.isNaN(regression.getRegressionSumSquares()));
assertTrue("SSE not NaN", Double.isNaN(regression.getSumSquaredErrors()));
assertTrue("predict not NaN", Double.isNaN(regression.predict(0)));
// but SSTO should be OK
assertTrue("SSTO NaN", !Double.isNaN(regression.getTotalSumSquares()));
regression = new SimpleRegression();
regression.addData(1, 2);
regression.addData(3, 3);
// All should be OK except MSE, s(b0), s(b1) which need one more df
assertTrue("interceptNaN", !Double.isNaN(regression.getIntercept()));
assertTrue("slope NaN", !Double.isNaN(regression.getSlope()));
assertTrue ("slope std err not NaN",
Double.isNaN(regression.getSlopeStdErr()));
assertTrue("intercept std err not NaN",
Double.isNaN(regression.getInterceptStdErr()));
assertTrue("MSE not NaN", Double.isNaN(regression.getMeanSquareError()));
assertTrue("r NaN", !Double.isNaN(regression.getR()));
assertTrue("r-square NaN", !Double.isNaN(regression.getRSquare()));
assertTrue("RSS NaN", !Double.isNaN(regression.getRegressionSumSquares()));
assertTrue("SSE NaN", !Double.isNaN(regression.getSumSquaredErrors()));
assertTrue("SSTO NaN", !Double.isNaN(regression.getTotalSumSquares()));
assertTrue("predict NaN", !Double.isNaN(regression.predict(0)));
regression.addData(1, 4);
// MSE, MSE, s(b0), s(b1) should all be OK now
assertTrue("MSE NaN", !Double.isNaN(regression.getMeanSquareError()));
assertTrue("slope std err NaN", !Double.isNaN(regression.getSlopeStdErr()));
assertTrue("intercept std err NaN",
!Double.isNaN(regression.getInterceptStdErr()));
}
public void testClear() {
SimpleRegression regression = new SimpleRegression();
regression.addData(corrData);
assertEquals("number of observations", 17, regression.getN());
regression.clear();
assertEquals("number of observations", 0, regression.getN());
regression.addData(corrData);
assertEquals("r-square", .896123, regression.getRSquare(), 10E-6);
regression.addData(data);
assertEquals("number of observations", 53, regression.getN());
}
public void testInference() throws Exception {
SimpleRegression regression = new SimpleRegression();
regression.addData(infData);
assertEquals("slope confidence interval", 0.0271,
regression.getSlopeConfidenceInterval(), 0.0001);
assertEquals("slope std err",0.01146,
regression.getSlopeStdErr(), 0.0001);
regression = new SimpleRegression();
regression.addData(infData2);
assertEquals("significance",0.023331,
regression.getSignificance(),0.0001);
//FIXME: get a real example to test against with alpha = .01
assertTrue("tighter means wider",
regression.getSlopeConfidenceInterval() <
regression.getSlopeConfidenceInterval(0.01));
try {
double x = regression.getSlopeConfidenceInterval(1);
fail("expecting IllegalArgumentException for alpha = 1");
} catch (IllegalArgumentException ex) {
;
}
}
public void testPerfect() throws Exception {
SimpleRegression regression = new SimpleRegression();
int n = 100;
for (int i = 0; i < n; i++) {
regression.addData(((double) i) / (n - 1), i);
}
assertEquals(0.0, regression.getSignificance(), 1.0e-5);
assertTrue(regression.getSlope() > 0.0);
}
public void testPerfectNegative() throws Exception {
SimpleRegression regression = new SimpleRegression();
int n = 100;
for (int i = 0; i < n; i++) {
regression.addData(- ((double) i) / (n - 1), i);
}
assertEquals(0.0, regression.getSignificance(), 1.0e-5);
assertTrue(regression.getSlope() < 0.0);
}
public void testRandom() throws Exception {
SimpleRegression regression = new SimpleRegression();
Random random = new Random(1);
int n = 100;
for (int i = 0; i < n; i++) {
regression.addData(((double) i) / (n - 1), random.nextDouble());
}
assertTrue( 0.0 < regression.getSignificance()
&& regression.getSignificance() < 1.0);
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]