Author: psteitz Date: Mon Feb 16 05:09:49 2009 New Revision: 744802 URL: http://svn.apache.org/viewvc?rev=744802&view=rev Log: Added Pearsons correlation implemendation. JIRA: MATH-114
Added: commons/proper/math/trunk/src/java/org/apache/commons/math/stat/correlation/PearsonsCorrelation.java (with props) commons/proper/math/trunk/src/test/R/correlationTestCases commons/proper/math/trunk/src/test/org/apache/commons/math/stat/correlation/PearsonsCorrelationTest.java (with props) Modified: commons/proper/math/trunk/src/site/xdoc/changes.xml commons/proper/math/trunk/src/test/R/testAll Added: commons/proper/math/trunk/src/java/org/apache/commons/math/stat/correlation/PearsonsCorrelation.java URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/java/org/apache/commons/math/stat/correlation/PearsonsCorrelation.java?rev=744802&view=auto ============================================================================== --- commons/proper/math/trunk/src/java/org/apache/commons/math/stat/correlation/PearsonsCorrelation.java (added) +++ commons/proper/math/trunk/src/java/org/apache/commons/math/stat/correlation/PearsonsCorrelation.java Mon Feb 16 05:09:49 2009 @@ -0,0 +1,270 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math.stat.correlation; + +import org.apache.commons.math.MathException; +import org.apache.commons.math.MathRuntimeException; +import org.apache.commons.math.distribution.TDistribution; +import org.apache.commons.math.distribution.TDistributionImpl; +import org.apache.commons.math.linear.RealMatrix; +import org.apache.commons.math.linear.DenseRealMatrix; +import org.apache.commons.math.stat.regression.SimpleRegression; + +/** + * Computes Pearson's product-moment correlation coefficients for pairs of arrays + * or columns of a matrix. + * + * <p>The constructors that take <code>RealMatrix</code> or + * <code>double[][]</code> arguments generate correlation matrices. The + * columns of the input matrices are assumed to represent variable values. + * Correlations are given by the formula</p> + * <code>cor(X, Y) = Σ[(x<sub>i</sub> - E(X))(y<sub>i</sub> - E(Y))] / [(n - 1)s(X)s(Y)]</code> + * where <code>E(X)</code> is the mean of <code>X</code>, <code>E(Y)</code> + * is the mean of the <code>Y</code> values and s(X), s(Y) are standard deviations. + * + * @version $Revision$ $Date$ + * @since 2.0 + */ +public class PearsonsCorrelation { + + /** correlation matrix */ + private final RealMatrix correlationMatrix; + + /** number of observations */ + private final int nObs; + + /** + * Create a PearsonsCorrelation instance without data + */ + public PearsonsCorrelation() { + super(); + correlationMatrix = null; + nObs = 0; + } + + /** + * Create a PearsonsCorrelation from a rectangular array + * whose columns represent values of variables to be correlated. + * + * @param data rectangular array with columns representing variables + * @throws IllegalArgumentException if the input data array is not + * rectangular with at least two rows and two columns. + */ + public PearsonsCorrelation(double[][] data) { + this(new DenseRealMatrix(data)); + } + + /** + * Create a PearsonsCorrelation from a RealMatrix whose columns + * represent variables to be correlated. + * + * @param matrix matrix with columns representing variables to correlate + */ + public PearsonsCorrelation(RealMatrix matrix) { + checkSufficientData(matrix); + nObs = matrix.getRowDimension(); + correlationMatrix = computeCorrelation(matrix); + } + + /** + * Create a PearsonsCorrelation from a {...@link Covariance}. The correlation + * matrix is computed by scaling the Covariance's covariance matrix. + * The Covariance instance must have been created from a data matrix with + * columns representing variable values. + * + * @param covariance Covariance instance + */ + public PearsonsCorrelation(Covariance covariance) { + RealMatrix covarianceMatrix = covariance.getCovarianceMatrix(); + if (covarianceMatrix == null) { + throw MathRuntimeException.createIllegalArgumentException( + "Covariance matrix is null", null); + } + nObs = covariance.getN(); + correlationMatrix = covarianceToCorrelation(covarianceMatrix); + } + + /** + * Create a PearsonsCorrelation from a covariance matrix. The correlation + * matrix is computed by scaling the covariance matrix. + * + * @param covarianceMatrix covariance matrix + * @param numberOfObservations the number of observations in the dataset used to compute + * the covariance matrix + */ + public PearsonsCorrelation(RealMatrix covarianceMatrix, int numberOfObservations) { + nObs = numberOfObservations; + correlationMatrix = covarianceToCorrelation(covarianceMatrix); + + } + + /** + * Returns the correlation matrix + * + * @return correlation matrix + */ + public RealMatrix getCorrelationMatrix() { + return correlationMatrix; + } + + /** + * Returns a matrix of standard errors associated with the estimates + * in the correlation matrix.<br/> + * <code>getCorrelationStandardErrors().getEntry(i,j)</code> is the standard + * error associated with <code>getCorrelationMatrix.getEntry(i,j)</code> + * <p>The formula used to compute the standard error is <br/> + * <code>SE<sub>r</sub> = ((1 - r<sup>2</sup>) / (n - 2))<sup>1/2</sup></code> + * where <code>r</code> is the estimated correlation coefficient and + * <code>n</code> is the number of observations in the source dataset.</p> + * + * @return matrix of correlation standard errors + */ + public RealMatrix getCorrelationStandardErrors() { + int nVars = correlationMatrix.getColumnDimension(); + double[][] out = new double[nVars][nVars]; + for (int i = 0; i < nVars; i++) { + for (int j = 0; j < nVars; j++) { + double r = correlationMatrix.getEntry(i, j); + out[i][j] = Math.sqrt((1 - r * r) /(nObs - 2)); + } + } + return new DenseRealMatrix(out); + } + + /** + * Returns a matrix of p-values associated with the (two-sided) null + * hypothesis that the corresponding correlation coefficient is zero. + * <p><code>getCorrelationPValues().getEntry(i,j)</code> is the probability + * that a random variable distributed as <code>t<sub>n-2</sub></code> takes + * a value with absolute value greater than or equal to <br> + * <code>|r|((n - 2) / (1 - r<sup>2</sup>))<sup>1/2</sup></code></p> + * <p>The values in the matrix are sometimes referred to as the + * <i>significance</i> of the corresponding correlation coefficients.</p> + * + * @return matrix of p-values + * @throws MathException if an error occurs estimating probabilities + */ + public RealMatrix getCorrelationPValues() throws MathException { + TDistribution tDistribution = new TDistributionImpl(nObs - 2); + int nVars = correlationMatrix.getColumnDimension(); + double[][] out = new double[nVars][nVars]; + for (int i = 0; i < nVars; i++) { + for (int j = 0; j < nVars; j++) { + if (i == j) { + out[i][j] = 0d; + } else { + double r = correlationMatrix.getEntry(i, j); + double t = Math.abs(r * Math.sqrt((nObs - 2)/(1 - r * r))); + out[i][j] = 2 * (1 - tDistribution.cumulativeProbability(t)); + } + } + } + return new DenseRealMatrix(out); + } + + + /** + * Computes the correlation matrix for the columns of the + * input matrix. + * + * @param matrix matrix with columns representing variables to correlate + * @return correlation matrix + */ + public RealMatrix computeCorrelation(RealMatrix matrix) { + int nVars = matrix.getColumnDimension(); + RealMatrix outMatrix = new DenseRealMatrix(nVars, nVars); + for (int i = 0; i < nVars; i++) { + for (int j = 0; j < i; j++) { + double corr = correlation(matrix.getColumn(i), matrix.getColumn(j)); + outMatrix.setEntry(i, j, corr); + outMatrix.setEntry(j, i, corr); + } + outMatrix.setEntry(i, i, 1d); + } + return outMatrix; + } + + /** + * Computes the Pearson's product-moment correlation coefficient between the two arrays. + * + * </p>Throws IllegalArgumentException if the arrays do not have the same length + * or their common length is less than 2</p> + * + * @param xArray first data array + * @param yArray second data array + * @return Returns Pearson's correlation coefficient for the two arrays + * @throws IllegalArgumentException if the arrays lengths do not match or + * there is insufficient data + */ + public double correlation(final double[] xArray, final double[] yArray) throws IllegalArgumentException { + SimpleRegression regression = new SimpleRegression(); + if(xArray.length == yArray.length && xArray.length > 1) { + for(int i=0; i<xArray.length; i++) { + regression.addData(xArray[i], yArray[i]); + } + return regression.getR(); + } + else { + throw MathRuntimeException.createIllegalArgumentException( + "Invalid array dimensions. xArray has size {0}; yArray has {1} elements", + new Object[] {xArray.length, yArray.length}); + } + } + + /** + * Derives a correlation matrix from a covariance matrix. + * + * <p>Uses the formula <br/> + * <code>r(X,Y) = cov(X,Y)/s(X)s(Y)</code> where + * <code>r(·,·)</code> is the correlation coefficient and + * <code>s(·)</code> means standard deviation.</p> + * + * @param covarianceMatrix the covariance matrix + * @return correlation matrix + */ + public RealMatrix covarianceToCorrelation(RealMatrix covarianceMatrix) { + int nVars = covarianceMatrix.getColumnDimension(); + RealMatrix outMatrix = new DenseRealMatrix(nVars, nVars); + for (int i = 0; i < nVars; i++) { + double sigma = Math.sqrt(covarianceMatrix.getEntry(i, i)); + outMatrix.setEntry(i, i, 1d); + for (int j = 0; j < i; j++) { + double entry = covarianceMatrix.getEntry(i, j) / + (sigma * Math.sqrt(covarianceMatrix.getEntry(j, j))); + outMatrix.setEntry(i, j, entry); + outMatrix.setEntry(j, i, entry); + } + } + return outMatrix; + } + + /** + * Throws IllegalArgumentException of the matrix does not have at least + * two columns and two rows + * + * @param matrix matrix to check for sufficiency + */ + private void checkSufficientData(final RealMatrix matrix) { + int nRows = matrix.getRowDimension(); + int nCols = matrix.getColumnDimension(); + if (nRows < 2 || nCols < 2) { + throw MathRuntimeException.createIllegalArgumentException( + "Insufficient data: only {0} rows and {1} columns.", + new Object[]{nRows, nCols}); + } + } +} Propchange: commons/proper/math/trunk/src/java/org/apache/commons/math/stat/correlation/PearsonsCorrelation.java ------------------------------------------------------------------------------ svn:eol-style = native Modified: commons/proper/math/trunk/src/site/xdoc/changes.xml URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/site/xdoc/changes.xml?rev=744802&r1=744801&r2=744802&view=diff ============================================================================== --- commons/proper/math/trunk/src/site/xdoc/changes.xml (original) +++ commons/proper/math/trunk/src/site/xdoc/changes.xml Mon Feb 16 05:09:49 2009 @@ -39,6 +39,10 @@ </properties> <body> <release version="2.0" date="TBD" description="TBD"> + <action dev="psteitz" type="add" issue="MATH-114" due-to="John Gant"> + Added PearsonsCorrelation class to compute correlation matrices, standard + errors and p-values for correlation coefficients. + </action> <action dev="psteitz" type="add" issue="MATH-114"> Added Covariance class to compute variance-covariance matrices in new correlation package. Added: commons/proper/math/trunk/src/test/R/correlationTestCases URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/test/R/correlationTestCases?rev=744802&view=auto ============================================================================== --- commons/proper/math/trunk/src/test/R/correlationTestCases (added) +++ commons/proper/math/trunk/src/test/R/correlationTestCases Mon Feb 16 05:09:49 2009 @@ -0,0 +1,185 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +#------------------------------------------------------------------------------ +# R source file to validate Pearson's correlation tests in +# org.apache.commons.math.stat.correlation.PearsonsCorrelationTest +# +# To run the test, install R, put this file and testFunctions +# into the same directory, launch R from this directory and then enter +# source("<name-of-this-file>") +# +#------------------------------------------------------------------------------ +tol <- 1E-9 # error tolerance for tests +#------------------------------------------------------------------------------ +# Function definitions + +source("testFunctions") # utility test functions +options(digits=16) # override number of digits displayed + +# function to verify correlation computations +verifyCorrelation <- function(matrix, expectedCorrelation, name) { + correlation <- cor(matrix) + output <- c("Correlation matrix test dataset = ", name) + if (assertEquals(expectedCorrelation, correlation,tol,"Correlations")) { + displayPadded(output, SUCCEEDED, WIDTH) + } else { + displayPadded(output, FAILED, WIDTH) + } +} + +# function to verify p-values +verifyPValues <- function(matrix, pValues, name) { + dimension <- dim(matrix)[2] + corValues <- matrix(nrow=dimension,ncol=dimension) + expectedValues <- matrix(nrow=dimension,ncol=dimension) + for (i in 2:dimension) { + for (j in 1:(i-1)) { + corValues[i,j]<-cor.test(matrix[,i], matrix[,j])$p.value + corValues[j,i]<-corValues[i,j] + } + } + for (i in 1:dimension) { + corValues[i,i] <- 1 + expectedValues[i,i] <- 1 + } + ptr <- 1 + for (i in 2:dimension) { + for (j in 1:(i-1)) { + expectedValues[i,j] <- pValues[ptr] + expectedValues[j,i] <- expectedValues[i,j] + ptr <- ptr + 1 + } + } + output <- c("Correlation p-values test dataset = ", name) + if (assertEquals(expectedValues, corValues,tol,"p-values")) { + displayPadded(output, SUCCEEDED, WIDTH) + } else { + displayPadded(output, FAILED, WIDTH) + } + } + +#-------------------------------------------------------------------------- +cat("Correlation test cases\n") + +# Longley + +longley <- matrix(c(60323,83.0,234289,2356,1590,107608,1947, + 61122,88.5,259426,2325,1456,108632,1948, + 60171,88.2,258054,3682,1616,109773,1949, + 61187,89.5,284599,3351,1650,110929,1950, + 63221,96.2,328975,2099,3099,112075,1951, + 63639,98.1,346999,1932,3594,113270,1952, + 64989,99.0,365385,1870,3547,115094,1953, + 63761,100.0,363112,3578,3350,116219,1954, + 66019,101.2,397469,2904,3048,117388,1955, + 67857,104.6,419180,2822,2857,118734,1956, + 68169,108.4,442769,2936,2798,120445,1957, + 66513,110.8,444546,4681,2637,121950,1958, + 68655,112.6,482704,3813,2552,123366,1959, + 69564,114.2,502601,3931,2514,125368,1960, + 69331,115.7,518173,4806,2572,127852,1961, + 70551,116.9,554894,4007,2827,130081,1962), + nrow = 16, ncol = 7, byrow = TRUE) + +expectedCorrelation <- matrix(c( + 1.000000000000000, 0.9708985250610560, 0.9835516111796693, 0.5024980838759942, + 0.4573073999764817, 0.960390571594376, 0.9713294591921188, + 0.970898525061056, 1.0000000000000000, 0.9915891780247822, 0.6206333925590966, + 0.4647441876006747, 0.979163432977498, 0.9911491900672053, + 0.983551611179669, 0.9915891780247822, 1.0000000000000000, 0.6042609398895580, + 0.4464367918926265, 0.991090069458478, 0.9952734837647849, + 0.502498083875994, 0.6206333925590966, 0.6042609398895580, 1.0000000000000000, + -0.1774206295018783, 0.686551516365312, 0.6682566045621746, + 0.457307399976482, 0.4647441876006747, 0.4464367918926265, -0.1774206295018783, + 1.0000000000000000, 0.364416267189032, 0.4172451498349454, + 0.960390571594376, 0.9791634329774981, 0.9910900694584777, 0.6865515163653120, + 0.3644162671890320, 1.000000000000000, 0.9939528462329257, + 0.971329459192119, 0.9911491900672053, 0.9952734837647849, 0.6682566045621746, + 0.4172451498349454, 0.993952846232926, 1.0000000000000000), + nrow = 7, ncol = 7, byrow = TRUE) + verifyCorrelation(longley, expectedCorrelation, "longley") + + expectedPValues <- c( + 4.38904690369668e-10, + 8.36353208910623e-12, 7.8159700933611e-14, + 0.0472894097790304, 0.01030636128354301, 0.01316878049026582, + 0.0749178049642416, 0.06971758330341182, 0.0830166169296545, 0.510948586323452, + 3.693245043123738e-09, 4.327782576751815e-11, 1.167954621905665e-13, 0.00331028281967516, 0.1652293725106684, + 3.95834476307755e-10, 1.114663916723657e-13, 1.332267629550188e-15, 0.00466039138541463, 0.1078477071581498, 7.771561172376096e-15) + verifyPValues(longley, expectedPValues, "longley") + + # Swiss Fertility + + fertility <- matrix(c(80.2,17.0,15,12,9.96, + 83.1,45.1,6,9,84.84, + 92.5,39.7,5,5,93.40, + 85.8,36.5,12,7,33.77, + 76.9,43.5,17,15,5.16, + 76.1,35.3,9,7,90.57, + 83.8,70.2,16,7,92.85, + 92.4,67.8,14,8,97.16, + 82.4,53.3,12,7,97.67, + 82.9,45.2,16,13,91.38, + 87.1,64.5,14,6,98.61, + 64.1,62.0,21,12,8.52, + 66.9,67.5,14,7,2.27, + 68.9,60.7,19,12,4.43, + 61.7,69.3,22,5,2.82, + 68.3,72.6,18,2,24.20, + 71.7,34.0,17,8,3.30, + 55.7,19.4,26,28,12.11, + 54.3,15.2,31,20,2.15, + 65.1,73.0,19,9,2.84, + 65.5,59.8,22,10,5.23, + 65.0,55.1,14,3,4.52, + 56.6,50.9,22,12,15.14, + 57.4,54.1,20,6,4.20, + 72.5,71.2,12,1,2.40, + 74.2,58.1,14,8,5.23, + 72.0,63.5,6,3,2.56, + 60.5,60.8,16,10,7.72, + 58.3,26.8,25,19,18.46, + 65.4,49.5,15,8,6.10, + 75.5,85.9,3,2,99.71, + 69.3,84.9,7,6,99.68, + 77.3,89.7,5,2,100.00, + 70.5,78.2,12,6,98.96, + 79.4,64.9,7,3,98.22, + 65.0,75.9,9,9,99.06, + 92.2,84.6,3,3,99.46, + 79.3,63.1,13,13,96.83, + 70.4,38.4,26,12,5.62, + 65.7,7.7,29,11,13.79, + 72.7,16.7,22,13,11.22, + 64.4,17.6,35,32,16.92, + 77.6,37.6,15,7,4.97, + 67.6,18.7,25,7,8.65, + 35.0,1.2,37,53,42.34, + 44.7,46.6,16,29,50.43, + 42.8,27.7,22,29,58.33), + nrow = 47, ncol = 5, byrow = TRUE) + + expectedCorrelation <- matrix(c( + 1.0000000000000000, 0.3530791836199747, -0.6458827064572875, -0.6637888570350691, 0.4636847006517939, + 0.3530791836199747, 1.0000000000000000,-0.6865422086171366, -0.6395225189483201, 0.4010950530487398, + -0.6458827064572875, -0.6865422086171366, 1.0000000000000000, 0.6984152962884830, -0.5727418060641666, + -0.6637888570350691, -0.6395225189483201, 0.6984152962884830, 1.0000000000000000, -0.1538589170909148, + 0.4636847006517939, 0.4010950530487398, -0.5727418060641666, -0.1538589170909148, 1.0000000000000000), + nrow = 5, ncol = 5, byrow = TRUE) + +verifyCorrelation(fertility, expectedCorrelation, "swiss fertility") + +displayDashes(WIDTH) \ No newline at end of file Modified: commons/proper/math/trunk/src/test/R/testAll URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/test/R/testAll?rev=744802&r1=744801&r2=744802&view=diff ============================================================================== --- commons/proper/math/trunk/src/test/R/testAll (original) +++ commons/proper/math/trunk/src/test/R/testAll Mon Feb 16 05:09:49 2009 @@ -50,6 +50,9 @@ # covariance source("covarianceTestCases") +# correlation +source("correlationTestCases") + #------------------------------------------------------------------------------ # if output has been diverted, change it back if (sink.number()) { Added: commons/proper/math/trunk/src/test/org/apache/commons/math/stat/correlation/PearsonsCorrelationTest.java URL: http://svn.apache.org/viewvc/commons/proper/math/trunk/src/test/org/apache/commons/math/stat/correlation/PearsonsCorrelationTest.java?rev=744802&view=auto ============================================================================== --- commons/proper/math/trunk/src/test/org/apache/commons/math/stat/correlation/PearsonsCorrelationTest.java (added) +++ commons/proper/math/trunk/src/test/org/apache/commons/math/stat/correlation/PearsonsCorrelationTest.java Mon Feb 16 05:09:49 2009 @@ -0,0 +1,274 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.commons.math.stat.correlation; + +import org.apache.commons.math.TestUtils; +import org.apache.commons.math.distribution.TDistribution; +import org.apache.commons.math.distribution.TDistributionImpl; +import org.apache.commons.math.linear.RealMatrix; +import org.apache.commons.math.linear.DenseRealMatrix; + +import junit.framework.TestCase; + +public class PearsonsCorrelationTest extends TestCase { + + protected final double[] longleyData = new double[] { + 60323,83.0,234289,2356,1590,107608,1947, + 61122,88.5,259426,2325,1456,108632,1948, + 60171,88.2,258054,3682,1616,109773,1949, + 61187,89.5,284599,3351,1650,110929,1950, + 63221,96.2,328975,2099,3099,112075,1951, + 63639,98.1,346999,1932,3594,113270,1952, + 64989,99.0,365385,1870,3547,115094,1953, + 63761,100.0,363112,3578,3350,116219,1954, + 66019,101.2,397469,2904,3048,117388,1955, + 67857,104.6,419180,2822,2857,118734,1956, + 68169,108.4,442769,2936,2798,120445,1957, + 66513,110.8,444546,4681,2637,121950,1958, + 68655,112.6,482704,3813,2552,123366,1959, + 69564,114.2,502601,3931,2514,125368,1960, + 69331,115.7,518173,4806,2572,127852,1961, + 70551,116.9,554894,4007,2827,130081,1962 + }; + + protected final double[] swissData = new double[] { + 80.2,17.0,15,12,9.96, + 83.1,45.1,6,9,84.84, + 92.5,39.7,5,5,93.40, + 85.8,36.5,12,7,33.77, + 76.9,43.5,17,15,5.16, + 76.1,35.3,9,7,90.57, + 83.8,70.2,16,7,92.85, + 92.4,67.8,14,8,97.16, + 82.4,53.3,12,7,97.67, + 82.9,45.2,16,13,91.38, + 87.1,64.5,14,6,98.61, + 64.1,62.0,21,12,8.52, + 66.9,67.5,14,7,2.27, + 68.9,60.7,19,12,4.43, + 61.7,69.3,22,5,2.82, + 68.3,72.6,18,2,24.20, + 71.7,34.0,17,8,3.30, + 55.7,19.4,26,28,12.11, + 54.3,15.2,31,20,2.15, + 65.1,73.0,19,9,2.84, + 65.5,59.8,22,10,5.23, + 65.0,55.1,14,3,4.52, + 56.6,50.9,22,12,15.14, + 57.4,54.1,20,6,4.20, + 72.5,71.2,12,1,2.40, + 74.2,58.1,14,8,5.23, + 72.0,63.5,6,3,2.56, + 60.5,60.8,16,10,7.72, + 58.3,26.8,25,19,18.46, + 65.4,49.5,15,8,6.10, + 75.5,85.9,3,2,99.71, + 69.3,84.9,7,6,99.68, + 77.3,89.7,5,2,100.00, + 70.5,78.2,12,6,98.96, + 79.4,64.9,7,3,98.22, + 65.0,75.9,9,9,99.06, + 92.2,84.6,3,3,99.46, + 79.3,63.1,13,13,96.83, + 70.4,38.4,26,12,5.62, + 65.7,7.7,29,11,13.79, + 72.7,16.7,22,13,11.22, + 64.4,17.6,35,32,16.92, + 77.6,37.6,15,7,4.97, + 67.6,18.7,25,7,8.65, + 35.0,1.2,37,53,42.34, + 44.7,46.6,16,29,50.43, + 42.8,27.7,22,29,58.33 + }; + + + /** + * Test Longley dataset against R. + */ + public void testLongly() throws Exception { + RealMatrix matrix = createRealMatrix(longleyData, 16, 7); + PearsonsCorrelation corrInstance = new PearsonsCorrelation(matrix); + RealMatrix correlationMatrix = corrInstance.getCorrelationMatrix(); + double[] rData = new double[] { + 1.000000000000000, 0.9708985250610560, 0.9835516111796693, 0.5024980838759942, + 0.4573073999764817, 0.960390571594376, 0.9713294591921188, + 0.970898525061056, 1.0000000000000000, 0.9915891780247822, 0.6206333925590966, + 0.4647441876006747, 0.979163432977498, 0.9911491900672053, + 0.983551611179669, 0.9915891780247822, 1.0000000000000000, 0.6042609398895580, + 0.4464367918926265, 0.991090069458478, 0.9952734837647849, + 0.502498083875994, 0.6206333925590966, 0.6042609398895580, 1.0000000000000000, + -0.1774206295018783, 0.686551516365312, 0.6682566045621746, + 0.457307399976482, 0.4647441876006747, 0.4464367918926265, -0.1774206295018783, + 1.0000000000000000, 0.364416267189032, 0.4172451498349454, + 0.960390571594376, 0.9791634329774981, 0.9910900694584777, 0.6865515163653120, + 0.3644162671890320, 1.000000000000000, 0.9939528462329257, + 0.971329459192119, 0.9911491900672053, 0.9952734837647849, 0.6682566045621746, + 0.4172451498349454, 0.993952846232926, 1.0000000000000000 + }; + TestUtils.assertEquals("correlation matrix", createRealMatrix(rData, 7, 7), correlationMatrix, 10E-15); + + double[] rPvalues = new double[] { + 4.38904690369668e-10, + 8.36353208910623e-12, 7.8159700933611e-14, + 0.0472894097790304, 0.01030636128354301, 0.01316878049026582, + 0.0749178049642416, 0.06971758330341182, 0.0830166169296545, 0.510948586323452, + 3.693245043123738e-09, 4.327782576751815e-11, 1.167954621905665e-13, 0.00331028281967516, 0.1652293725106684, + 3.95834476307755e-10, 1.114663916723657e-13, 1.332267629550188e-15, 0.00466039138541463, 0.1078477071581498, 7.771561172376096e-15 + }; + RealMatrix rPMatrix = createLowerTriangularRealMatrix(rPvalues, 7); + fillUpper(rPMatrix, 0d); + TestUtils.assertEquals("correlation p values", rPMatrix, corrInstance.getCorrelationPValues(), 10E-15); + } + + /** + * Test R Swiss fertility dataset against R. + */ + public void testSwissFertility() throws Exception { + RealMatrix matrix = createRealMatrix(swissData, 47, 5); + PearsonsCorrelation corrInstance = new PearsonsCorrelation(matrix); + RealMatrix correlationMatrix = corrInstance.getCorrelationMatrix(); + double[] rData = new double[] { + 1.0000000000000000, 0.3530791836199747, -0.6458827064572875, -0.6637888570350691, 0.4636847006517939, + 0.3530791836199747, 1.0000000000000000,-0.6865422086171366, -0.6395225189483201, 0.4010950530487398, + -0.6458827064572875, -0.6865422086171366, 1.0000000000000000, 0.6984152962884830, -0.5727418060641666, + -0.6637888570350691, -0.6395225189483201, 0.6984152962884830, 1.0000000000000000, -0.1538589170909148, + 0.4636847006517939, 0.4010950530487398, -0.5727418060641666, -0.1538589170909148, 1.0000000000000000 + }; + TestUtils.assertEquals("correlation matrix", createRealMatrix(rData, 5, 5), correlationMatrix, 10E-15); + + double[] rPvalues = new double[] { + 0.01491720061472623, + 9.45043734069043e-07, 9.95151527133974e-08, + 3.658616965962355e-07, 1.304590105694471e-06, 4.811397236181847e-08, + 0.001028523190118147, 0.005204433539191644, 2.588307925380906e-05, 0.301807756132683 + }; + RealMatrix rPMatrix = createLowerTriangularRealMatrix(rPvalues, 5); + fillUpper(rPMatrix, 0d); + TestUtils.assertEquals("correlation p values", rPMatrix, corrInstance.getCorrelationPValues(), 10E-15); + } + + /** + * Constant column + */ + public void testConstant() { + double[] noVariance = new double[] {1, 1, 1, 1}; + double[] values = new double[] {1, 2, 3, 4}; + assertTrue(Double.isNaN(new PearsonsCorrelation().correlation(noVariance, values))); + assertTrue(Double.isNaN(new PearsonsCorrelation().correlation(noVariance, values))); + } + + + /** + * Insufficient data + */ + + public void testInsufficientData() { + double[] one = new double[] {1}; + double[] two = new double[] {2}; + try { + new PearsonsCorrelation().correlation(one, two); + fail("Expecting IllegalArgumentException"); + } catch (IllegalArgumentException ex) { + // Expected + } + RealMatrix matrix = new DenseRealMatrix(new double[][] {{0},{1}}); + try { + new PearsonsCorrelation(matrix); + fail("Expecting IllegalArgumentException"); + } catch (IllegalArgumentException ex) { + // Expected + } + } + + /** + * Verify that direct t-tests using standard error estimates are consistent + * with reported p-values + */ + public void testStdErrorConsistency() throws Exception { + TDistribution tDistribution = new TDistributionImpl(45); + RealMatrix matrix = createRealMatrix(swissData, 47, 5); + PearsonsCorrelation corrInstance = new PearsonsCorrelation(matrix); + RealMatrix rValues = corrInstance.getCorrelationMatrix(); + RealMatrix pValues = corrInstance.getCorrelationPValues(); + RealMatrix stdErrors = corrInstance.getCorrelationStandardErrors(); + for (int i = 0; i < 5; i++) { + for (int j = 0; j < i; j++) { + double t = Math.abs(rValues.getEntry(i, j)) / stdErrors.getEntry(i, j); + double p = 2 * (1 - tDistribution.cumulativeProbability(t)); + assertEquals(p, pValues.getEntry(i, j), 10E-15); + } + } + } + + /** + * Verify that creating correlation from covariance gives same results as + * direct computation from the original matrix + */ + public void testCovarianceConsistency() throws Exception { + RealMatrix matrix = createRealMatrix(longleyData, 16, 7); + PearsonsCorrelation corrInstance = new PearsonsCorrelation(matrix); + Covariance covInstance = new Covariance(matrix); + PearsonsCorrelation corrFromCovInstance = new PearsonsCorrelation(covInstance); + TestUtils.assertEquals("correlation values", corrInstance.getCorrelationMatrix(), + corrFromCovInstance.getCorrelationMatrix(), 10E-15); + TestUtils.assertEquals("p values", corrInstance.getCorrelationPValues(), + corrFromCovInstance.getCorrelationPValues(), 10E-15); + TestUtils.assertEquals("standard errors", corrInstance.getCorrelationStandardErrors(), + corrFromCovInstance.getCorrelationStandardErrors(), 10E-15); + + PearsonsCorrelation corrFromCovInstance2 = + new PearsonsCorrelation(covInstance.getCovarianceMatrix(), 16); + TestUtils.assertEquals("correlation values", corrInstance.getCorrelationMatrix(), + corrFromCovInstance2.getCorrelationMatrix(), 10E-15); + TestUtils.assertEquals("p values", corrInstance.getCorrelationPValues(), + corrFromCovInstance2.getCorrelationPValues(), 10E-15); + TestUtils.assertEquals("standard errors", corrInstance.getCorrelationStandardErrors(), + corrFromCovInstance2.getCorrelationStandardErrors(), 10E-15); + } + + protected RealMatrix createRealMatrix(double[] data, int nRows, int nCols) { + double[][] matrixData = new double[nRows][nCols]; + int ptr = 0; + for (int i = 0; i < nRows; i++) { + System.arraycopy(data, ptr, matrixData[i], 0, nCols); + ptr += nCols; + } + return new DenseRealMatrix(matrixData); + } + + protected RealMatrix createLowerTriangularRealMatrix(double[] data, int dimension) { + int ptr = 0; + RealMatrix result = new DenseRealMatrix(dimension, dimension); + for (int i = 1; i < dimension; i++) { + for (int j = 0; j < i; j++) { + result.setEntry(i, j, data[ptr]); + ptr++; + } + } + return result; + } + + protected void fillUpper(RealMatrix matrix, double diagonalValue) { + int dimension = matrix.getColumnDimension(); + for (int i = 0; i < dimension; i++) { + matrix.setEntry(i, i, diagonalValue); + for (int j = i+1; j < dimension; j++) { + matrix.setEntry(i, j, matrix.getEntry(j, i)); + } + } + } +} Propchange: commons/proper/math/trunk/src/test/org/apache/commons/math/stat/correlation/PearsonsCorrelationTest.java ------------------------------------------------------------------------------ svn:eol-style = native