This is an automated email from the ASF dual-hosted git repository.
aherbert pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-statistics.git
The following commit(s) were added to refs/heads/master by this push:
new 8564fc9 STATISTICS-80: Variance Implementation (#52)
8564fc9 is described below
commit 8564fc95e122fdf69953603583af0fa559ebf68e
Author: Anirudh <[email protected]>
AuthorDate: Thu Sep 7 23:54:26 2023 +0530
STATISTICS-80: Variance Implementation (#52)
* Refactor common test cases to a new class TestData.
* Consolidate test cases for non-finites.
---------
Co-authored-by: Alex Herbert <[email protected]>
---
.../statistics/descriptive/FirstMoment.java | 34 ++-
.../commons/statistics/descriptive/Mean.java | 9 +-
.../descriptive/SumOfSquaredDeviations.java | 111 ++++++++++
.../commons/statistics/descriptive/Variance.java | 235 ++++++++++++++++++++
.../commons/statistics/descriptive/MeanTest.java | 203 +++++-------------
.../commons/statistics/descriptive/TestData.java | 149 +++++++++++++
.../commons/statistics/descriptive/TestHelper.java | 19 +-
.../statistics/descriptive/VarianceTest.java | 238 +++++++++++++++++++++
8 files changed, 818 insertions(+), 180 deletions(-)
diff --git
a/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/FirstMoment.java
b/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/FirstMoment.java
index 7194b0b..94745be 100644
---
a/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/FirstMoment.java
+++
b/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/FirstMoment.java
@@ -16,6 +16,8 @@
*/
package org.apache.commons.statistics.descriptive;
+import java.util.function.DoubleConsumer;
+
/**
* Computes the first moment (arithmetic mean) using the definitional formula:
*
@@ -35,8 +37,8 @@ package org.apache.commons.statistics.descriptive;
*
* <p><strong>Note that this implementation is not synchronized.</strong> If
* multiple threads access an instance of this class concurrently, and at least
- * one of the threads invokes the <code>increment()</code> or
- * <code>clear()</code> method, it must be synchronized externally.
+ * one of the threads invokes the <code>accept()</code> or
+ * <code>combine()</code> method, it must be synchronized externally.
*
* <p>However, it is safe to use <code>accept()</code> and
<code>combine()</code>
* as <code>accumulator</code> and <code>combiner</code> functions of
@@ -45,25 +47,25 @@ package org.apache.commons.statistics.descriptive;
* provides the necessary partitioning, isolation, and merging of results for
* safe and efficient parallel execution.
*/
-class FirstMoment implements DoubleStatistic,
DoubleStatisticAccumulator<FirstMoment> {
+class FirstMoment implements DoubleConsumer {
/** Count of values that have been added. */
- private long n;
+ protected long n;
/** First moment of values that have been added. */
- private double m1;
+ protected double m1;
/**
* Deviation of most recently added value from the previous first moment.
* Retained to prevent repeated computation in higher order moments.
*/
- private double dev;
+ protected double dev;
/**
* Deviation of most recently added value from the previous first moment,
* normalized by current sample size. Retained to prevent repeated
* computation in higher order moments.
*/
- private double nDev;
+ protected double nDev;
/**
* Running sum of values seen so far.
@@ -118,8 +120,7 @@ class FirstMoment implements DoubleStatistic,
DoubleStatisticAccumulator<FirstMo
* <p> {@code Infinity}, if infinities of the same sign have been
encountered.
* <p> {@code NaN} otherwise.
*/
- @Override
- public double getAsDouble() {
+ public double getFirstMoment() {
if (Double.isFinite(m1)) {
return n == 0 ? Double.NaN : m1;
}
@@ -127,8 +128,12 @@ class FirstMoment implements DoubleStatistic,
DoubleStatisticAccumulator<FirstMo
return nonFiniteValue;
}
- /** {@inheritDoc} */
- @Override
+ /**
+ * Combines the state of another {@code FirstMoment} into this one.
+ *
+ * @param other Another {@code FirstMoment} to be combined.
+ * @return {@code this} instance after combining {@code other}.
+ */
public FirstMoment combine(FirstMoment other) {
if (n == 0) {
n = other.n;
@@ -153,13 +158,6 @@ class FirstMoment implements DoubleStatistic,
DoubleStatisticAccumulator<FirstMo
return this;
}
- /**
- * @return Number of values seen so far.
- */
- long getN() {
- return n;
- }
-
/**
* Gets the running sum of the values seen so far.
* @return Running Sum.
diff --git
a/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/Mean.java
b/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/Mean.java
index ad6bdf4..5f17c6e 100644
---
a/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/Mean.java
+++
b/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/Mean.java
@@ -42,8 +42,8 @@ package org.apache.commons.statistics.descriptive;
*
* <p><strong>Note that this implementation is not synchronized.</strong> If
* multiple threads access an instance of this class concurrently, and at least
- * one of the threads invokes the <code>increment()</code> or
- * <code>clear()</code> method, it must be synchronized externally.
+ * one of the threads invokes the <code>accept()</code> or
+ * <code>combine()</code> method, it must be synchronized externally.
*
* <p>However, it is safe to use <code>accept()</code> and
<code>combine()</code>
* as <code>accumulator</code> and <code>combiner</code> functions of
@@ -148,7 +148,6 @@ public abstract class Mean implements DoubleStatistic,
DoubleStatisticAccumulato
firstMoment = new FirstMoment();
}
-
/**
* Creates a StorelessMean instance with an External Moment.
*
@@ -168,7 +167,7 @@ public abstract class Mean implements DoubleStatistic,
DoubleStatisticAccumulato
@Override
public double getAsDouble() {
- return firstMoment.getAsDouble();
+ return firstMoment.getFirstMoment();
}
@Override
@@ -183,7 +182,7 @@ public abstract class Mean implements DoubleStatistic,
DoubleStatisticAccumulato
* @return Number of values.
*/
long getN() {
- return firstMoment.getN();
+ return firstMoment.n;
}
/**
diff --git
a/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/SumOfSquaredDeviations.java
b/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/SumOfSquaredDeviations.java
new file mode 100644
index 0000000..9c24d29
--- /dev/null
+++
b/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/SumOfSquaredDeviations.java
@@ -0,0 +1,111 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.statistics.descriptive;
+
+/**
+ * Computes the sum of squared deviations from the sample mean. This
+ * statistic is related to the second moment.
+ *
+ * <p>
+ * The following recursive updating formula is used:
+ * <p>
+ * Let <ul>
+ * <li> dev = (current obs - previous mean) </li>
+ * <li> n = number of observations (including current obs) </li>
+ * </ul>
+ * Then
+ * <p>
+ * new value = old value + dev^2 * (n - 1) / n.
+ * <p>
+ *
+ * Returns the sum of squared deviations of all values seen so far.
+ *
+ * <p><strong>Note that this implementation is not synchronized.</strong> If
+ * multiple threads access an instance of this class concurrently, and at least
+ * one of the threads invokes the {@link
java.util.function.DoubleConsumer#accept(double) accept} or
+ * {@link DoubleStatisticAccumulator#combine(DoubleStatistic) combine} method,
it must be synchronized externally.
+ *
+ * <p>However, it is safe to use {@link
java.util.function.DoubleConsumer#accept(double) accept} and
+ * {@link DoubleStatisticAccumulator#combine(DoubleStatistic) combine} as
<code>accumulator</code> and
+ * <code>combiner</code> functions of {@link java.util.stream.Collector
Collector} on a parallel stream, because the
+ * parallel implementation of {@link java.util.stream.Stream#collect
Stream.collect()} provides the necessary
+ * partitioning, isolation, and merging of results for safe and efficient
parallel execution.
+ */
+class SumOfSquaredDeviations extends FirstMoment {
+ /** Sum of squared deviations of the values that have been added. */
+ private double squaredDevSum;
+
+ /**
+ * Create a SumOfSquaredDeviations instance.
+ */
+ SumOfSquaredDeviations() {
+ // No-op
+ }
+
+ /**
+ * Create a SumOfSquaredDeviations instance with the given sum of
+ * squared deviations and a FirstMoment instance.
+ *
+ * @param squaredDevSum Sum of squared deviations.
+ * @param mean Mean of values.
+ * @param n Number of values.
+ * @param nonFiniteValue Sum of values.
+ */
+ SumOfSquaredDeviations(double squaredDevSum, double mean, long n, double
nonFiniteValue) {
+ super(mean, n, nonFiniteValue);
+ this.squaredDevSum = squaredDevSum;
+ }
+
+ /**
+ * Updates the state of the statistic to reflect the addition of {@code
value}.
+ * @param value Value.
+ */
+ @Override
+ public void accept(double value) {
+ super.accept(value);
+ squaredDevSum += (n - 1) * dev * nDev;
+ }
+
+ /**
+ * Gets the sum of squared deviations of all input values.
+ *
+ * @return {@code SumOfSquaredDeviations} of all values seen so far.
+ */
+ public double getSumOfSquaredDeviations() {
+ return Double.isFinite(getFirstMoment()) ? squaredDevSum : Double.NaN;
+ }
+
+ /**
+ * Combines the state of another {@code SumOfSquaredDeviations} into this
one.
+ *
+ * @param other Another {@code SumOfSquaredDeviations} to be combined.
+ * @return {@code this} instance after combining {@code other}.
+ */
+ public SumOfSquaredDeviations combine(SumOfSquaredDeviations other) {
+ final long oldN = n;
+ final long otherN = other.n;
+ if (oldN == 0) {
+ squaredDevSum = other.squaredDevSum;
+ } else if (otherN != 0) {
+ final double diffOfMean = other.getFirstMoment() - m1;
+ final double sqDiffOfMean = diffOfMean * diffOfMean;
+ squaredDevSum += other.squaredDevSum + sqDiffOfMean * (((double)
oldN * otherN) / ((double) oldN + otherN));
+ }
+ super.combine(other);
+ return this;
+ }
+}
diff --git
a/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/Variance.java
b/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/Variance.java
new file mode 100644
index 0000000..d03d170
--- /dev/null
+++
b/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/Variance.java
@@ -0,0 +1,235 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.statistics.descriptive;
+
+/**
+ * Computes the variance of a set of values. By default, the
+ * "sample variance" is computed. The definitional formula for sample
+ * variance is:
+ * <p>
+ * sum((x_i - mean)^2) / (n - 1)
+ * <p>This formula does not have good numerical properties, so this
+ * implementation does not use it to compute the statistic.
+ * <ul>
+ * <li> The {@link #accept(double)} method computes the variance using
+ * updating formulae based on West's algorithm, as described in
+ * <a href="http://doi.acm.org/10.1145/359146.359152"> Chan, T. F. and
+ * J. G. Lewis 1979, <i>Communications of the ACM</i>,
+ * vol. 22 no. 9, pp. 526-531.</a></li>
+ *
+ * <li> The {@link #of(double...)} method leverages the fact that it has the
+ * full array of values in memory to execute a two-pass algorithm.
+ * Specifically, this method uses the "corrected two-pass algorithm" from
+ * Chan, Golub, Levesque, <i>Algorithms for Computing the Sample Variance</i>,
+ * American Statistician, vol. 37, no. 3 (1983) pp. 242-247.</li></ul>
+ *
+ * Note that adding values using {@code accept} and then executing {@code
getAsDouble} will
+ * sometimes give a different, less accurate, result than executing
+ * {@code of} with the full array of values. The former approach
+ * should only be used when the full array of values is not available.
+ *
+ * <p>
+ * Returns <code>Double.NaN</code> if no data values have been added and
+ * returns <code>0</code> if there is just one finite value in the data set.
+ * Note that <code>Double.NaN</code> may also be returned if the input includes
+ * <code>Double.NaN</code> and / or infinite values.
+ *
+ * <p>This class is designed to work with (though does not require)
+ * {@linkplain java.util.stream streams}.
+ *
+ * <p><strong>Note that this implementation is not synchronized.</strong> If
+ * multiple threads access an instance of this class concurrently, and at least
+ * one of the threads invokes the {@link
java.util.function.DoubleConsumer#accept(double) accept} or
+ * {@link DoubleStatisticAccumulator#combine(DoubleStatistic) combine} method,
it must be synchronized externally.
+ *
+ * <p>However, it is safe to use <code>accept()</code> and
<code>combine()</code>
+ * as <code>accumulator</code> and <code>combiner</code> functions of
+ * {@link java.util.stream.Collector Collector} on a parallel stream,
+ * because the parallel implementation of {@link
java.util.stream.Stream#collect Stream.collect()}
+ * provides the necessary partitioning, isolation, and merging of results for
+ * safe and efficient parallel execution.
+ *
+ * @since 1.1
+ */
+public abstract class Variance implements DoubleStatistic,
DoubleStatisticAccumulator<Variance> {
+
+ /**
+ * Create a Variance instance.
+ */
+ Variance() {
+ // No-op
+ }
+
+ /**
+ * Creates a {@code Variance} implementation which does not store the
input value(s) it consumes.
+ *
+ * <p>The result is <code>NaN</code> if:
+ * <ul>
+ * <li>no values have been added,</li>
+ * <li>any of the values is <code>NaN</code>, or</li>
+ * <li>an infinite value of either sign is encountered</li>
+ * </ul>
+ *
+ * @return {@code Variance} implementation.
+ */
+ public static Variance create() {
+ return new StorelessSampleVariance();
+ }
+
+ /**
+ * Returns a {@code Variance} instance that has the variance of all input
values, or <code>NaN</code>
+ * if:
+ * <ul>
+ * <li>the input array is empty,</li>
+ * <li>any of the values is <code>NaN</code>,</li>
+ * <li>an infinite value of either sign is encountered, or</li>
+ * <li>the sum of the squared deviations from the mean is infinite</li>
+ * </ul>
+ *
+ * <p>Note: {@code Variance} computed using {@link Variance#accept
Variance.accept()} may be different
+ * from this variance.
+ *
+ * <p>See {@link Variance} for details on the computing algorithm.
+ *
+ * @param values Values.
+ * @return {@code Variance} instance.
+ */
+ public static Variance of(double... values) {
+ final double mean = Mean.of(values).getAsDouble();
+ if (!Double.isFinite(mean)) {
+ return StorelessSampleVariance.create(Math.abs(mean), mean,
values.length, mean);
+ }
+ double accum = 0.0;
+ double dev;
+ double accum2 = 0.0;
+ double squaredDevSum;
+ for (final double value : values) {
+ dev = value - mean;
+ accum += dev * dev;
+ accum2 += dev;
+ }
+ final double accum2Squared = accum2 * accum2;
+ final long n = values.length;
+ // The sum of squared deviations is accum - (accum2Squared / n).
+ // To prevent squaredDevSum from spuriously attaining a NaN value
+ // when accum is infinite, assign it an infinite value which is its
intended value.
+ if (accum == Double.POSITIVE_INFINITY) {
+ squaredDevSum = Double.POSITIVE_INFINITY;
+ } else {
+ squaredDevSum = accum - (accum2Squared / n);
+ }
+ return StorelessSampleVariance.create(squaredDevSum, mean, n, accum2 +
(mean * n));
+ }
+
+ /**
+ * Updates the state of the statistic to reflect the addition of {@code
value}.
+ * @param value Value.
+ */
+ @Override
+ public abstract void accept(double value);
+
+ /**
+ * Gets the variance of all input values.
+ *
+ * <p>The result is <code>NaN</code> if :
+ * <ul>
+ * <li>the input array is empty,</li>
+ * <li>any of the values is <code>NaN</code>, or</li>
+ * <li>an infinite value of either sign is encountered</li>
+ * </ul>
+ *
+ * <p>The result is <code>0</code> if there is just one finite value in
the data set.
+ *
+ * @return {@code Variance} of all values seen so far.
+ */
+ @Override
+ public abstract double getAsDouble();
+
+ /** {@inheritDoc} */
+ @Override
+ public abstract Variance combine(Variance other);
+
+ /**
+ * {@code Variance} implementation that does not store the input value(s)
processed so far.
+ */
+ private static class StorelessSampleVariance extends Variance {
+
+ /**
+ * An instance of {@link SumOfSquaredDeviations}, which is used to
+ * compute the variance.
+ */
+ private final SumOfSquaredDeviations squaredDeviationSum;
+
+ /**
+ * Creates a StorelessVariance instance with the sum of squared
+ * deviations from the mean.
+ *
+ * @param squaredDevSum Sum of squared deviations.
+ * @param mean Mean of values.
+ * @param n Number of values.
+ * @param sumOfValues Sum of values.
+ */
+ private StorelessSampleVariance(double squaredDevSum, double mean,
long n, double sumOfValues) {
+ squaredDeviationSum = new SumOfSquaredDeviations(squaredDevSum,
mean, n, sumOfValues);
+ }
+
+ /**
+ * Create a SumOfSquaredDeviations instance.
+ */
+ StorelessSampleVariance() {
+ squaredDeviationSum = new SumOfSquaredDeviations();
+ }
+
+ /**
+ * Creates a StorelessVariance instance with the sum of squared
+ * deviations from the mean.
+ *
+ * @param squaredDevSum Sum of squared deviations.
+ * @param mean Mean of values.
+ * @param n Number of values.
+ * @param sumOfValues Sum of values.
+ * @return A StorelessVariance instance.
+ */
+ static StorelessSampleVariance create(double squaredDevSum, double
mean, long n, double sumOfValues) {
+ return new StorelessSampleVariance(squaredDevSum, mean, n,
sumOfValues);
+ }
+
+ @Override
+ public void accept(double value) {
+ squaredDeviationSum.accept(value);
+ }
+
+ @Override
+ public double getAsDouble() {
+ final double sumOfSquaredDev =
squaredDeviationSum.getSumOfSquaredDeviations();
+ final double n = squaredDeviationSum.n;
+ if (n == 0) {
+ return Double.NaN;
+ } else if (n == 1 && Double.isFinite(sumOfSquaredDev)) {
+ return 0;
+ }
+ return sumOfSquaredDev / (n - 1);
+ }
+
+ @Override
+ public Variance combine(Variance other) {
+ final StorelessSampleVariance that = (StorelessSampleVariance)
other;
+ squaredDeviationSum.combine(that.squaredDeviationSum);
+ return this;
+ }
+ }
+}
diff --git
a/commons-statistics-descriptive/src/test/java/org/apache/commons/statistics/descriptive/MeanTest.java
b/commons-statistics-descriptive/src/test/java/org/apache/commons/statistics/descriptive/MeanTest.java
index 28064de..0754919 100644
---
a/commons-statistics-descriptive/src/test/java/org/apache/commons/statistics/descriptive/MeanTest.java
+++
b/commons-statistics-descriptive/src/test/java/org/apache/commons/statistics/descriptive/MeanTest.java
@@ -16,15 +16,11 @@
*/
package org.apache.commons.statistics.descriptive;
-import java.math.BigDecimal;
-import java.math.MathContext;
import java.util.Arrays;
-import java.util.stream.Stream;
import org.apache.commons.rng.UniformRandomProvider;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
-import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;
/**
@@ -54,9 +50,9 @@ final class MeanTest {
}
@ParameterizedTest
- @MethodSource(value = "testMean")
+ @MethodSource(value =
"org.apache.commons.statistics.descriptive.TestData#testValues")
void testMean(double[] values) {
- double expected = computeExpected(values);
+ final double expected = computeExpected(values);
Mean mean = Mean.create();
for (double value : values) {
mean.accept(value);
@@ -66,10 +62,10 @@ final class MeanTest {
}
@ParameterizedTest
- @MethodSource(value = "testMean")
+ @MethodSource(value =
"org.apache.commons.statistics.descriptive.TestData#testValues")
void testParallelStream(double[] values) {
- double expected = computeExpected(values);
- double ans = Arrays.stream(values)
+ final double expected = computeExpected(values);
+ final double ans = Arrays.stream(values)
.parallel()
.collect(Mean::create, Mean::accept, Mean::combine)
.getAsDouble();
@@ -77,7 +73,7 @@ final class MeanTest {
}
@ParameterizedTest
- @MethodSource(value = "testMean")
+ @MethodSource(value =
"org.apache.commons.statistics.descriptive.TestData#testValues")
void testMeanRandomOrder(double[] values) {
UniformRandomProvider rng = TestHelper.createRNG();
for (int i = 1; i <= 10; i++) {
@@ -86,44 +82,12 @@ final class MeanTest {
}
}
- static Stream<Arguments> testMean() {
- return Stream.of(
- Arguments.of(new double[] {0.0}),
- Arguments.of(new double[] {10, 8, 13, 9, 11, 14, 6, 4, 12, 7, 5}),
- Arguments.of(new double[] {8.04, 6.95, 7.58, 8.81, 8.33, 9.96,
7.24, 4.26, 10.84, 4.82, 5.68}),
- Arguments.of(new double[] {9.14, 8.14, 8.74, 8.77, 9.26, 8.10,
6.13, 3.10, 9.13, 7.26, 4.74, 7.46, 6.77, 12.74, 7.11, 7.81, 8.84, 6.08, 5.39,
8.15, 6.42, 5.73}),
- Arguments.of(new double[] {8, 8, 8, 8, 8, 8, 8, 19, 8, 8, 8}),
- Arguments.of(new double[] {6.58, 5.76, 7.71, 8.84, 8.47, 7.04,
5.25, 12.50, 5.56, 7.91, 6.89}),
- Arguments.of(new double[] {0, 0, 0.0}),
- Arguments.of(new double[] {1, -7, 6}),
- Arguments.of(new double[] {1, 7, -15, 3}),
- Arguments.of(new double[] {2, 2, 2, 2}),
- Arguments.of(new double[] {2.3}),
- Arguments.of(new double[] {3.14, 2.718, 1.414}),
- Arguments.of(new double[] {12.5, 12.0, 11.8, 14.2, 14.9, 14.5,
21.0, 8.2, 10.3, 11.3, 14.1, 9.9, 12.2, 12.0, 12.1, 11.0, 19.8, 11.0, 10.0,
8.8, 9.0, 12.3}),
- Arguments.of(new double[] {-0.0, +0.0}),
- Arguments.of(new double[] {0.0, -0.0}),
- Arguments.of(new double[] {0.0, +0.0}),
- Arguments.of(new double[] {0.001, 0.0002, 0.00003, 10000.11,
0.000004}),
- Arguments.of(new double[] {10E-50, 5E-100, 25E-200, 35.345E-50}),
- // Overflow of the sum
- Arguments.of(new double[] {Double.MAX_VALUE, Double.MAX_VALUE}),
- Arguments.of(new double[] {-Double.MAX_VALUE, -Double.MAX_VALUE}),
- Arguments.of(new double[] {Double.MAX_VALUE, 1}),
- Arguments.of(new double[] {-Double.MAX_VALUE, 1, 1}),
- Arguments.of(new double[] {-Double.MAX_VALUE, -1, 1}),
- Arguments.of(new double[] {Double.MAX_VALUE, -1}),
- Arguments.of(new double[] {Double.MAX_VALUE, -Double.MAX_VALUE}),
- Arguments.of(new double[] {1, -Double.MAX_VALUE}),
- Arguments.of(new double[] {1, 1, 1, -Double.MAX_VALUE}),
- Arguments.of(new double[] {Double.MAX_VALUE, Double.MAX_VALUE /
2}),
- Arguments.of(new double[] {Double.MAX_VALUE, Double.MAX_VALUE,
-Double.MAX_VALUE})
- );
- }
-
@ParameterizedTest
- @MethodSource(value = "testMeanNonFinite")
- void testMeanNonFinite(double[] values, double expected) {
+ @MethodSource(value =
"org.apache.commons.statistics.descriptive.TestData#testValuesNonFinite")
+ void testMeanNonFinite(double[] values) {
+ final double expected = Arrays.stream(values)
+ .average()
+ .orElse(Double.NaN);
Mean mean = Mean.create();
for (double value : values) {
mean.accept(value);
@@ -133,9 +97,12 @@ final class MeanTest {
}
@ParameterizedTest
- @MethodSource(value = "testMeanNonFinite")
- void testParallelStreamNonFinite(double[] values, double expected) {
- double ans = Arrays.stream(values)
+ @MethodSource(value =
"org.apache.commons.statistics.descriptive.TestData#testValuesNonFinite")
+ void testParallelStreamNonFinite(double[] values) {
+ final double expected = Arrays.stream(values)
+ .average()
+ .orElse(Double.NaN);
+ final double ans = Arrays.stream(values)
.parallel()
.collect(Mean::create, Mean::accept, Mean::combine)
.getAsDouble();
@@ -143,45 +110,17 @@ final class MeanTest {
}
@ParameterizedTest
- @MethodSource(value = "testMeanNonFinite")
- void testMeanRandomOrderNonFinite(double[] values, double expected) {
+ @MethodSource(value =
"org.apache.commons.statistics.descriptive.TestData#testValuesNonFinite")
+ void testMeanRandomOrderNonFinite(double[] values) {
UniformRandomProvider rng = TestHelper.createRNG();
for (int i = 1; i <= 10; i++) {
- testMeanNonFinite(TestHelper.shuffle(rng, values), expected);
- testParallelStreamNonFinite(TestHelper.shuffle(rng, values),
expected);
+ testMeanNonFinite(TestHelper.shuffle(rng, values));
+ testParallelStreamNonFinite(TestHelper.shuffle(rng, values));
}
}
- static Stream<Arguments> testMeanNonFinite() {
- return Stream.of(
- Arguments.of(new double[] {}, Double.NaN),
- Arguments.of(new double[] {Double.POSITIVE_INFINITY,
Double.NEGATIVE_INFINITY},
- Double.NaN),
- Arguments.of(new double[] {Double.POSITIVE_INFINITY,
Double.POSITIVE_INFINITY},
- Double.POSITIVE_INFINITY),
- Arguments.of(new double[] {Double.NEGATIVE_INFINITY,
Double.NEGATIVE_INFINITY},
- Double.NEGATIVE_INFINITY),
- Arguments.of(new double[] {Double.POSITIVE_INFINITY,
Double.MAX_VALUE},
- Double.POSITIVE_INFINITY),
- Arguments.of(new double[] {Double.NEGATIVE_INFINITY,
-Double.MIN_VALUE},
- Double.NEGATIVE_INFINITY),
- Arguments.of(new double[] {Double.NaN, 34.56, 89.74}, Double.NaN),
- Arguments.of(new double[] {34.56, Double.NaN, 89.74}, Double.NaN),
- Arguments.of(new double[] {34.56, 89.74, Double.NaN}, Double.NaN),
- Arguments.of(new double[] {Double.NaN, 3.14, Double.NaN,
Double.NaN},
- Double.NaN),
- Arguments.of(new double[] {Double.NaN, Double.NaN, Double.NaN},
Double.NaN),
- Arguments.of(new double[] {Double.NEGATIVE_INFINITY,
Double.MAX_VALUE},
- Double.NEGATIVE_INFINITY),
- Arguments.of(new double[] {Double.POSITIVE_INFINITY,
Double.POSITIVE_INFINITY,
- Double.POSITIVE_INFINITY, Double.POSITIVE_INFINITY},
Double.POSITIVE_INFINITY),
- Arguments.of(new double[] {-Double.MAX_VALUE,
Double.POSITIVE_INFINITY},
- Double.POSITIVE_INFINITY)
- );
- }
-
@ParameterizedTest
- @MethodSource(value = "testCombine")
+ @MethodSource(value =
"org.apache.commons.statistics.descriptive.TestData#testCombine")
void testCombine(double[] array1, double[] array2) {
final double[] combinedArray = TestHelper.concatenate(array1, array2);
final double expected = computeExpected(combinedArray);
@@ -203,11 +142,11 @@ final class MeanTest {
}
@ParameterizedTest
- @MethodSource(value = "testCombine")
+ @MethodSource(value =
"org.apache.commons.statistics.descriptive.TestData#testCombine")
void testCombineRandomOrder(double[] array1, double[] array2) {
UniformRandomProvider rng = TestHelper.createRNG();
double[] data = TestHelper.concatenate(array1, array2);
- int n = array1.length;
+ final int n = array1.length;
for (int i = 1; i <= 10; i++) {
for (int j = 1; j <= 10; j++) {
TestHelper.shuffle(rng, array1);
@@ -222,12 +161,12 @@ final class MeanTest {
}
@ParameterizedTest
- @MethodSource(value = "testCombine")
+ @MethodSource(value =
"org.apache.commons.statistics.descriptive.TestData#testCombine")
void testArrayOfArrays(double[] array1, double[] array2) {
final double[] combinedArray = TestHelper.concatenate(array1, array2);
final double expected = computeExpected(combinedArray);
final double[][] values = {array1, array2};
- double actual = Arrays.stream(values)
+ final double actual = Arrays.stream(values)
.map(Mean::of)
.reduce(Mean::combine)
.map(Mean::getAsDouble)
@@ -235,74 +174,50 @@ final class MeanTest {
TestHelper.assertEquals(expected, actual, ULP_COMBINE, () -> "array of
arrays combined mean");
}
- static Stream<Arguments> testCombine() {
- return Stream.of(
- Arguments.of(new double[] {}, new double[] {1}),
- Arguments.of(new double[] {1}, new double[] {}),
- Arguments.of(new double[] {}, new double[] {1, 7, -15, 3}),
- Arguments.of(new double[] {0}, new double[] {0, 0.0}),
- Arguments.of(new double[] {4, 8, -6, 3, 18}, new double[] {1, -7,
6}),
- Arguments.of(new double[] {10, 8, 13, 9, 11, 14, 6, 4, 12, 7, 5},
new double[] {8, 8, 8, 8, 8, 8, 8, 19, 8, 8, 8}),
- Arguments.of(new double[] {10, 8, 13, 9, 11, 14, 6, 4, 12, 7, 5},
new double[] {7.46, 6.77, 12.74, 7.11, 7.81, 8.84, 6.08, 5.39, 8.15, 6.42,
5.73}),
- Arguments.of(new double[] {6.0, -1.32, -5.78, 8.967, 13.32, -9.67,
0.14, 7.321, 11.456, -3.111}, new double[] {2, 2, 2, 2}),
- Arguments.of(new double[] {2.3}, new double[] {-42, 10, -88, 5,
-17}),
- Arguments.of(new double[] {-20, 34.983, -12.745, 28.12, -8.34, 42,
-4, 16}, new double[] {3.14, 2.718, 1.414}),
- Arguments.of(new double[] {12.5, 12.0, 11.8, 14.2, 14.9, 14.5,
21.0, 8.2, 10.3, 11.3, 14.1, 9.9}, new double[] {12.2, 12.0, 12.1, 11.0, 19.8,
11.0, 10.0, 8.8, 9.0, 12.3}),
- Arguments.of(new double[] {-0.0}, new double[] {+0.0}),
- Arguments.of(new double[] {0.0}, new double[] {-0.0}),
- Arguments.of(new double[] {0.0}, new double[] {+0.0}),
- Arguments.of(new double[] {10E-50, 5E-100}, new double[] {25E-200,
35.345E-50}),
- Arguments.of(new double[] {Double.MAX_VALUE}, new double[]
{Double.MAX_VALUE}),
- Arguments.of(new double[] {-Double.MAX_VALUE}, new double[]
{-Double.MAX_VALUE}),
- Arguments.of(new double[] {-Double.MAX_VALUE, 1}, new double[]
{1}),
- Arguments.of(new double[] {Double.MAX_VALUE, 3.1415E153}, new
double[] {}),
- Arguments.of(new double[] {1}, new double[] {-Double.MAX_VALUE}),
- Arguments.of(new double[] {1, 1, 1}, new double[]
{-Double.MAX_VALUE}),
- Arguments.of(new double[] {Double.MAX_VALUE}, new double[] {1,
1E300}),
- Arguments.of(new double[] {Double.MAX_VALUE}, new double[]
{-Double.MAX_VALUE}),
- Arguments.of(new double[] {Double.MAX_VALUE}, new double[]
{Double.MAX_VALUE, -Double.MAX_VALUE}),
- Arguments.of(new double[] {Double.MAX_VALUE, Double.MAX_VALUE},
new double[] {-Double.MAX_VALUE}),
- Arguments.of(new double[] {Double.MAX_VALUE, Double.MAX_VALUE},
new double[] {-Double.MAX_VALUE, -Double.MAX_VALUE}),
- Arguments.of(new double[] {Double.MAX_VALUE, Double.MAX_VALUE},
new double[] {-Double.MAX_VALUE, -Double.MAX_VALUE, -Double.MAX_VALUE})
- );
- }
-
@ParameterizedTest
- @MethodSource(value = "testCombineNonFinite")
- void testCombineNonFinite(double[][] values, double expected) {
+ @MethodSource(value =
"org.apache.commons.statistics.descriptive.TestData#testCombineNonFinite")
+ void testCombineNonFinite(double[][] values) {
+ final double expected = Arrays.stream(values)
+ .flatMapToDouble(Arrays::stream)
+ .average()
+ .orElse(Double.NaN);
Mean mean1 = Mean.create();
Mean mean2 = Mean.create();
Arrays.stream(values[0]).forEach(mean1);
Arrays.stream(values[1]).forEach(mean2);
- double mean2BeforeCombine = mean2.getAsDouble();
+ final double mean2BeforeCombine = mean2.getAsDouble();
mean1.combine(mean2);
Assertions.assertEquals(expected, mean1.getAsDouble(), "combine
non-finite");
Assertions.assertEquals(mean2BeforeCombine, mean2.getAsDouble());
}
@ParameterizedTest
- @MethodSource(value = "testCombineNonFinite")
- void testCombineRandomOrderNonFinite(double[][] values, double expected) {
+ @MethodSource(value =
"org.apache.commons.statistics.descriptive.TestData#testCombineNonFinite")
+ void testCombineRandomOrderNonFinite(double[][] values) {
UniformRandomProvider rng = TestHelper.createRNG();
- double[] data = TestHelper.concatenate(values[0], values[1]);
- int n = values[0].length;
+ final double[] data = TestHelper.concatenate(values[0], values[1]);
+ final int n = values[0].length;
for (int i = 1; i <= 10; i++) {
for (int j = 1; j <= 10; j++) {
TestHelper.shuffle(rng, values[0]);
TestHelper.shuffle(rng, values[1]);
- testCombineNonFinite(values, expected);
+ testCombineNonFinite(values);
}
TestHelper.shuffle(rng, data);
System.arraycopy(data, 0, values[0], 0, n);
System.arraycopy(data, n, values[1], 0, values[1].length);
- testCombineNonFinite(values, expected);
+ testCombineNonFinite(values);
}
}
@ParameterizedTest
- @MethodSource(value = "testCombineNonFinite")
- void testArrayOfArraysNonFinite(double[][] values, double expected) {
- double actual = Arrays.stream(values)
+ @MethodSource(value =
"org.apache.commons.statistics.descriptive.TestData#testCombineNonFinite")
+ void testArrayOfArraysNonFinite(double[][] values) {
+ final double expected = Arrays.stream(values)
+ .flatMapToDouble(Arrays::stream)
+ .average()
+ .orElse(Double.NaN);
+ final double actual = Arrays.stream(values)
.map(Mean::of)
.reduce(Mean::combine)
.map(Mean::getAsDouble)
@@ -310,30 +225,8 @@ final class MeanTest {
Assertions.assertEquals(expected, actual, "array of arrays combined
mean non-finite");
}
- static Stream<Arguments> testCombineNonFinite() {
- return Stream.of(
- Arguments.of(new double[][] {{}, {}}, Double.NaN),
- Arguments.of(new double[][] {{Double.POSITIVE_INFINITY},
{Double.NEGATIVE_INFINITY}}, Double.NaN),
- Arguments.of(new double[][] {{Double.POSITIVE_INFINITY},
{Double.POSITIVE_INFINITY}}, Double.POSITIVE_INFINITY),
- Arguments.of(new double[][] {{Double.NEGATIVE_INFINITY},
{Double.NEGATIVE_INFINITY}}, Double.NEGATIVE_INFINITY),
- Arguments.of(new double[][] {{Double.POSITIVE_INFINITY},
{Double.MAX_VALUE}}, Double.POSITIVE_INFINITY),
- Arguments.of(new double[][] {{-Double.MAX_VALUE},
{Double.POSITIVE_INFINITY}}, Double.POSITIVE_INFINITY),
- Arguments.of(new double[][] {{Double.NEGATIVE_INFINITY},
{-Double.MIN_VALUE}}, Double.NEGATIVE_INFINITY),
- Arguments.of(new double[][] {{Double.NaN, 34.56, 89.74},
{Double.NaN}}, Double.NaN),
- Arguments.of(new double[][] {{34.56}, {Double.NaN, 89.74}},
Double.NaN),
- Arguments.of(new double[][] {{34.56, 89.74}, {Double.NaN,
Double.NaN}}, Double.NaN),
- Arguments.of(new double[][] {{Double.NaN, 3.14, Double.NaN,
Double.NaN}, {}}, Double.NaN),
- Arguments.of(new double[][] {{Double.NaN, Double.NaN, Double.NaN},
{Double.NaN, Double.NaN, Double.NaN}}, Double.NaN),
- Arguments.of(new double[][] {{Double.NEGATIVE_INFINITY,
-Double.MAX_VALUE, -Double.MIN_VALUE}, {Double.MAX_VALUE, Double.MIN_VALUE}},
Double.NEGATIVE_INFINITY)
- );
- }
-
- // Helper function to compute the expected value of Mean using BigDecimal.
+ // Helper function which converts the mean of BigDecimal type to a double
type.
private static double computeExpected(double[] values) {
- BigDecimal bd = BigDecimal.ZERO;
- for (double value : values) {
- bd = bd.add(new BigDecimal(value));
- }
- return bd.divide(BigDecimal.valueOf(values.length),
MathContext.DECIMAL128).doubleValue();
+ return TestHelper.computeExpectedMean(values).doubleValue();
}
}
diff --git
a/commons-statistics-descriptive/src/test/java/org/apache/commons/statistics/descriptive/TestData.java
b/commons-statistics-descriptive/src/test/java/org/apache/commons/statistics/descriptive/TestData.java
new file mode 100644
index 0000000..aa0a4f8
--- /dev/null
+++
b/commons-statistics-descriptive/src/test/java/org/apache/commons/statistics/descriptive/TestData.java
@@ -0,0 +1,149 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.statistics.descriptive;
+
+import java.util.stream.Stream;
+import org.junit.jupiter.params.provider.Arguments;
+
+/**
+ * Utility class which provides the data for tests in {o.a.c.s.descriptive}
module.
+ */
+final class TestData {
+
+ /** Class contains only static methods. */
+ private TestData() {}
+
+ /**
+ * Function which supplies test data for a statistic as a single array.
+ * @return Stream of 1-d arrays.
+ */
+ static Stream<double[]> testValues() {
+ return Stream.of(
+ new double[] {0.0},
+ new double[] {10, 8, 13, 9, 11, 14, 6, 4, 12, 7, 5},
+ new double[] {8.04, 6.95, 7.58, 8.81, 8.33, 9.96, 7.24, 4.26,
10.84, 4.82, 5.68},
+ new double[] {9.14, 8.14, 8.74, 8.77, 9.26, 8.10, 6.13, 3.10,
9.13, 7.26, 4.74, 7.46, 6.77, 12.74, 7.11, 7.81, 8.84, 6.08, 5.39, 8.15, 6.42,
5.73},
+ new double[] {8, 8, 8, 8, 8, 8, 8, 19, 8, 8, 8},
+ new double[] {6.58, 5.76, 7.71, 8.84, 8.47, 7.04, 5.25, 12.50,
5.56, 7.91, 6.89},
+ new double[] {0, 0, 0.0},
+ new double[] {1, -7, 6},
+ new double[] {1, 7, -15, 3},
+ new double[] {2, 2, 2, 2},
+ new double[] {2.3},
+ new double[] {3.14, 2.718, 1.414},
+ new double[] {12.5, 12.0, 11.8, 14.2, 14.9, 14.5, 21.0, 8.2, 10.3,
11.3, 14.1, 9.9, 12.2, 12.0, 12.1, 11.0, 19.8, 11.0, 10.0, 8.8, 9.0, 12.3},
+ new double[] {-0.0, +0.0},
+ new double[] {0.0, -0.0},
+ new double[] {0.0, +0.0},
+ new double[] {0.001, 0.0002, 0.00003, 10000.11, 0.000004},
+ new double[] {10E-50, 5E-100, 25E-200, 35.345E-50},
+ // Overflow of the sum
+ new double[] {Double.MAX_VALUE, Double.MAX_VALUE},
+ new double[] {-Double.MAX_VALUE, -Double.MAX_VALUE},
+ new double[] {Double.MAX_VALUE, 1},
+ new double[] {-Double.MAX_VALUE, 1, 1},
+ new double[] {-Double.MAX_VALUE, -1, 1},
+ new double[] {Double.MAX_VALUE, -1},
+ new double[] {Double.MAX_VALUE, -Double.MAX_VALUE},
+ new double[] {1, -Double.MAX_VALUE},
+ new double[] {1, 1, 1, -Double.MAX_VALUE},
+ new double[] {Double.MAX_VALUE, Double.MAX_VALUE / 2},
+ new double[] {Double.MAX_VALUE, Double.MAX_VALUE,
-Double.MAX_VALUE},
+ new double[] {Double.MAX_VALUE, -Double.MAX_VALUE}
+ );
+ }
+
+ /**
+ * Function which supplies test data for a statistic as a single array.
+ * Each case will contain at least one non-finite value.
+ * @return Stream of 1-d arrays.
+ */
+ static Stream<double[]> testValuesNonFinite() {
+ return Stream.of(
+ new double[]{},
+ new double[]{Double.POSITIVE_INFINITY, Double.NEGATIVE_INFINITY},
+ new double[]{Double.NaN, 34.56, 89.74},
+ new double[]{34.56, Double.NaN, 89.74},
+ new double[]{34.56, 89.74, Double.NaN},
+ new double[]{Double.NaN, 3.14, Double.NaN, Double.NaN},
+ new double[]{Double.NaN, Double.NaN, Double.NaN},
+ new double[]{Double.POSITIVE_INFINITY, Double.POSITIVE_INFINITY},
+ new double[]{Double.NEGATIVE_INFINITY, Double.NEGATIVE_INFINITY},
+ new double[]{Double.POSITIVE_INFINITY, Double.MAX_VALUE},
+ new double[]{Double.NEGATIVE_INFINITY, -Double.MIN_VALUE},
+ new double[]{Double.NEGATIVE_INFINITY, Double.MAX_VALUE},
+ new double[]{Double.POSITIVE_INFINITY, Double.POSITIVE_INFINITY,
Double.POSITIVE_INFINITY, Double.POSITIVE_INFINITY},
+ new double[]{-Double.MAX_VALUE, Double.POSITIVE_INFINITY}
+ );
+ }
+
+ /**
+ * Function which supplies test data for a statistic as a pair of double[]
arrays.
+ * @return Stream of 1-d arrays.
+ */
+ static Stream<Arguments> testCombine() {
+ return Stream.of(
+ Arguments.of(new double[] {}, new double[] {1}),
+ Arguments.of(new double[] {1}, new double[] {}),
+ Arguments.of(new double[] {}, new double[] {1, 7, -15, 3}),
+ Arguments.of(new double[] {0}, new double[] {0, 0.0}),
+ Arguments.of(new double[] {4, 8, -6, 3, 18}, new double[] {1, -7,
6}),
+ Arguments.of(new double[] {10, 8, 13, 9, 11, 14, 6, 4, 12, 7, 5},
new double[] {8, 8, 8, 8, 8, 8, 8, 19, 8, 8, 8}),
+ Arguments.of(new double[] {10, 8, 13, 9, 11, 14, 6, 4, 12, 7, 5},
new double[] {7.46, 6.77, 12.74, 7.11, 7.81, 8.84, 6.08, 5.39, 8.15, 6.42,
5.73}),
+ Arguments.of(new double[] {6.0, -1.32, -5.78, 8.967, 13.32, -9.67,
0.14, 7.321, 11.456, -3.111}, new double[] {2, 2, 2, 2}),
+ Arguments.of(new double[] {2.3}, new double[] {-42, 10, -88, 5,
-17}),
+ Arguments.of(new double[] {-20, 34.983, -12.745, 28.12, -8.34, 42,
-4, 16}, new double[] {3.14, 2.718, 1.414}),
+ Arguments.of(new double[] {12.5, 12.0, 11.8, 14.2, 14.9, 14.5,
21.0, 8.2, 10.3, 11.3, 14.1, 9.9}, new double[] {12.2, 12.0, 12.1, 11.0, 19.8,
11.0, 10.0, 8.8, 9.0, 12.3}),
+ Arguments.of(new double[] {-0.0}, new double[] {+0.0}),
+ Arguments.of(new double[] {0.0}, new double[] {-0.0}),
+ Arguments.of(new double[] {0.0}, new double[] {+0.0}),
+ Arguments.of(new double[] {10E-50, 5E-100}, new double[] {25E-200,
35.345E-50}),
+ Arguments.of(new double[] {Double.MAX_VALUE}, new double[]
{Double.MAX_VALUE}),
+ Arguments.of(new double[] {-Double.MAX_VALUE}, new double[]
{-Double.MAX_VALUE}),
+ Arguments.of(new double[] {-Double.MAX_VALUE, 1}, new double[]
{1}),
+ Arguments.of(new double[] {Double.MAX_VALUE, 3.1415E153}, new
double[] {}),
+ Arguments.of(new double[] {Double.MAX_VALUE}, new double[]
{-Double.MAX_VALUE}),
+ Arguments.of(new double[] {1}, new double[] {-Double.MAX_VALUE}),
+ Arguments.of(new double[] {1, 1, 1}, new double[]
{-Double.MAX_VALUE}),
+ Arguments.of(new double[] {Double.MAX_VALUE}, new double[] {1,
1E300})
+ );
+ }
+
+ /**
+ * Function which supplies test data for a statistic as a double[][] array.
+ * Each case will contain at least one non-finite value.
+ * @return Stream of 2-d arrays.
+ */
+ static Stream<double[][]> testCombineNonFinite() {
+ return Stream.of(
+ new double[][] {{}, {}},
+ new double[][] {{Double.POSITIVE_INFINITY},
{Double.NEGATIVE_INFINITY}},
+ new double[][] {{Double.NaN, 34.56, 89.74}, {Double.NaN}},
+ new double[][] {{34.56}, {Double.NaN, 89.74}},
+ new double[][] {{34.56, 89.74}, {Double.NaN, Double.NaN}},
+ new double[][] {{Double.NaN, 3.14, Double.NaN, Double.NaN}, {}},
+ new double[][] {{Double.NaN, Double.NaN, Double.NaN}, {Double.NaN,
Double.NaN, Double.NaN}},
+ new double[][] {{Double.POSITIVE_INFINITY},
{Double.POSITIVE_INFINITY}},
+ new double[][] {{Double.NEGATIVE_INFINITY},
{Double.NEGATIVE_INFINITY}},
+ new double[][] {{Double.POSITIVE_INFINITY}, {Double.MAX_VALUE}},
+ new double[][] {{-Double.MAX_VALUE}, {Double.POSITIVE_INFINITY}},
+ new double[][] {{Double.NEGATIVE_INFINITY}, {-Double.MIN_VALUE}},
+ new double[][] {{Double.NEGATIVE_INFINITY, -Double.MAX_VALUE,
-Double.MIN_VALUE},
+ {Double.MAX_VALUE, Double.MIN_VALUE}}
+ );
+ }
+}
diff --git
a/commons-statistics-descriptive/src/test/java/org/apache/commons/statistics/descriptive/TestHelper.java
b/commons-statistics-descriptive/src/test/java/org/apache/commons/statistics/descriptive/TestHelper.java
index 01c48d1..f10192c 100644
---
a/commons-statistics-descriptive/src/test/java/org/apache/commons/statistics/descriptive/TestHelper.java
+++
b/commons-statistics-descriptive/src/test/java/org/apache/commons/statistics/descriptive/TestHelper.java
@@ -16,6 +16,8 @@
*/
package org.apache.commons.statistics.descriptive;
+import java.math.BigDecimal;
+import java.math.MathContext;
import java.util.Arrays;
import java.util.function.Supplier;
import org.apache.commons.numbers.core.Precision;
@@ -47,6 +49,19 @@ final class TestHelper {
.toArray();
}
+ /**
+ * Helper function to compute the expected value of Mean using BigDecimal.
+ * @param values Values.
+ * @return Mean of values rounded to <a href =
"https://en.wikipedia.org/wiki/Decimal128_floating-point_format"> DECIMAL128
precision</a>.
+ */
+ static BigDecimal computeExpectedMean(double[] values) {
+ BigDecimal bd = BigDecimal.ZERO;
+ for (double value : values) {
+ bd = bd.add(new BigDecimal(value));
+ }
+ return bd.divide(BigDecimal.valueOf(values.length),
MathContext.DECIMAL128);
+ }
+
/**
* Helper function to assert that {@code actual} is equal to {@code
expected} as defined
* by {@link org.apache.commons.numbers.core.Precision#equals(double,
double, int)
@@ -78,8 +93,8 @@ final class TestHelper {
* Format the difference in ULP between two arguments. This will return
"0" for values
* that are binary equal, or for the difference between zeros of opposite
signs.
*
- * @param a first argument
- * @param b second argument
+ * @param expected first argument
+ * @param actual second argument
* @return Signed ULP difference between the arguments as a string
*/
private static String formatUlpDifference(double expected, double actual) {
diff --git
a/commons-statistics-descriptive/src/test/java/org/apache/commons/statistics/descriptive/VarianceTest.java
b/commons-statistics-descriptive/src/test/java/org/apache/commons/statistics/descriptive/VarianceTest.java
new file mode 100644
index 0000000..86e9768
--- /dev/null
+++
b/commons-statistics-descriptive/src/test/java/org/apache/commons/statistics/descriptive/VarianceTest.java
@@ -0,0 +1,238 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.statistics.descriptive;
+
+import java.math.BigDecimal;
+import java.math.MathContext;
+import java.util.Arrays;
+import org.apache.commons.rng.UniformRandomProvider;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.MethodSource;
+
+/**
+ * Test for {@link Variance}.
+ */
+final class VarianceTest {
+ private static final int ULP_ARRAY = 4;
+
+ private static final int ULP_STREAM = 5;
+
+ private static final int ULP_COMBINE_ACCEPT = 6;
+
+ private static final int ULP_COMBINE_OF = 2;
+
+ @Test
+ void testEmpty() {
+ Variance var = Variance.create();
+ Assertions.assertEquals(Double.NaN, var.getAsDouble());
+ }
+
+ @Test
+ void testNaN() {
+ Variance variance = Variance.create();
+ double[] testArray = {Double.NaN, +0.0d, -0.0d,
Double.POSITIVE_INFINITY, Double.NEGATIVE_INFINITY};
+ for (double value : testArray) {
+ variance.accept(value);
+ }
+ Assertions.assertEquals(Double.NaN, variance.getAsDouble());
+ }
+
+ @ParameterizedTest
+ @MethodSource(value =
"org.apache.commons.statistics.descriptive.TestData#testValues")
+ void testVariance(double[] values) {
+ final double expected = computeExpectedVariance(values);
+ Variance var = Variance.create();
+ for (double value : values) {
+ var.accept(value);
+ }
+ TestHelper.assertEquals(expected, var.getAsDouble(), ULP_STREAM, () ->
"variance");
+ TestHelper.assertEquals(expected, Variance.of(values).getAsDouble(),
ULP_ARRAY, () -> "of (values)");
+ }
+
+ @ParameterizedTest
+ @MethodSource(value =
"org.apache.commons.statistics.descriptive.TestData#testValues")
+ void testParallelStream(double[] values) {
+ final double expected = computeExpectedVariance(values);
+ final double actual = Arrays.stream(values)
+ .parallel()
+ .collect(Variance::create, Variance::accept, Variance::combine)
+ .getAsDouble();
+ TestHelper.assertEquals(expected, actual, ULP_COMBINE_ACCEPT, () ->
"parallel stream");
+ }
+
+ @ParameterizedTest
+ @MethodSource(value =
"org.apache.commons.statistics.descriptive.TestData#testValues")
+ void testVarianceRandomOrder(double[] values) {
+ UniformRandomProvider rng = TestHelper.createRNG();
+ for (int i = 1; i <= 10; i++) {
+ testVariance(TestHelper.shuffle(rng, values));
+ testParallelStream(TestHelper.shuffle(rng, values));
+ }
+ }
+
+ @ParameterizedTest
+ @MethodSource(value =
"org.apache.commons.statistics.descriptive.TestData#testValuesNonFinite")
+ void testVarianceNonFinite(double[] values) {
+ final double expected = Double.NaN;
+ Variance var = Variance.create();
+ for (double value : values) {
+ var.accept(value);
+ }
+ Assertions.assertEquals(expected, var.getAsDouble(), "variance
non-finite");
+ Assertions.assertEquals(expected, Variance.of(values).getAsDouble(),
"of (values) non-finite");
+ }
+
+ @ParameterizedTest
+ @MethodSource(value =
"org.apache.commons.statistics.descriptive.TestData#testValuesNonFinite")
+ void testParallelStreamNonFinite(double[] values) {
+ final double expected = Double.NaN;
+ final double ans = Arrays.stream(values)
+ .parallel()
+ .collect(Variance::create, Variance::accept, Variance::combine)
+ .getAsDouble();
+ Assertions.assertEquals(expected, ans, "parallel stream non-finite");
+ }
+
+ @ParameterizedTest
+ @MethodSource(value =
"org.apache.commons.statistics.descriptive.TestData#testValuesNonFinite")
+ void testVarianceRandomOrderNonFinite(double[] values) {
+ UniformRandomProvider rng = TestHelper.createRNG();
+ for (int i = 1; i <= 10; i++) {
+ testVarianceNonFinite(TestHelper.shuffle(rng, values));
+ testParallelStreamNonFinite(TestHelper.shuffle(rng, values));
+ }
+ }
+
+ @ParameterizedTest
+ @MethodSource(value =
"org.apache.commons.statistics.descriptive.TestData#testCombine")
+ void testCombine(double[] array1, double[] array2) {
+ final double[] combinedArray = TestHelper.concatenate(array1, array2);
+ final double expected = computeExpectedVariance(combinedArray);
+ Variance var1 = Variance.create();
+ Variance var2 = Variance.create();
+ Arrays.stream(array1).forEach(var1);
+ Arrays.stream(array2).forEach(var2);
+ final double var1BeforeCombine = var1.getAsDouble();
+ final double var2BeforeCombine = var2.getAsDouble();
+ var1.combine(var2);
+ TestHelper.assertEquals(expected, var1.getAsDouble(),
ULP_COMBINE_ACCEPT, () -> "combine");
+ Assertions.assertEquals(var2BeforeCombine, var2.getAsDouble());
+ // Combine in reverse order
+ Variance var1b = Variance.create();
+ Arrays.stream(array1).forEach(var1b);
+ var2.combine(var1b);
+ TestHelper.assertEquals(expected, var2.getAsDouble(),
ULP_COMBINE_ACCEPT, () -> "combine");
+ Assertions.assertEquals(var1BeforeCombine, var1b.getAsDouble());
+ }
+
+ @ParameterizedTest
+ @MethodSource(value =
"org.apache.commons.statistics.descriptive.TestData#testCombine")
+ void testCombineRandomOrder(double[] array1, double[] array2) {
+ UniformRandomProvider rng = TestHelper.createRNG();
+ double[] data = TestHelper.concatenate(array1, array2);
+ final int n = array1.length;
+ for (int i = 1; i <= 10; i++) {
+ for (int j = 1; j <= 10; j++) {
+ TestHelper.shuffle(rng, array1);
+ TestHelper.shuffle(rng, array2);
+ testCombine(array1, array2);
+ }
+ TestHelper.shuffle(rng, data);
+ System.arraycopy(data, 0, array1, 0, n);
+ System.arraycopy(data, n, array2, 0, array2.length);
+ testCombine(array1, array2);
+ }
+ }
+
+ @ParameterizedTest
+ @MethodSource(value =
"org.apache.commons.statistics.descriptive.TestData#testCombine")
+ void testArrayOfArrays(double[] array1, double[] array2) {
+ final double[] combinedArray = TestHelper.concatenate(array1, array2);
+ final double expected = computeExpectedVariance(combinedArray);
+ final double[][] values = {array1, array2};
+ final double actual = Arrays.stream(values)
+ .map(Variance::of)
+ .reduce(Variance::combine)
+ .map(Variance::getAsDouble)
+ .orElseThrow(RuntimeException::new);
+ TestHelper.assertEquals(expected, actual, ULP_COMBINE_OF, () -> "array
of arrays combined variance");
+ }
+
+ @ParameterizedTest
+ @MethodSource(value =
"org.apache.commons.statistics.descriptive.TestData#testCombineNonFinite")
+ void testCombineNonFinite(double[][] values) {
+ final double expected = Double.NaN;
+ Variance var1 = Variance.create();
+ Variance var2 = Variance.create();
+ Arrays.stream(values[0]).forEach(var1);
+ Arrays.stream(values[1]).forEach(var2);
+ final double mean2BeforeCombine = var2.getAsDouble();
+ var1.combine(var2);
+ Assertions.assertEquals(expected, var1.getAsDouble(), "combine
non-finite");
+ Assertions.assertEquals(mean2BeforeCombine, var2.getAsDouble());
+ }
+
+ @ParameterizedTest
+ @MethodSource(value =
"org.apache.commons.statistics.descriptive.TestData#testCombineNonFinite")
+ void testCombineRandomOrderNonFinite(double[][] values) {
+ UniformRandomProvider rng = TestHelper.createRNG();
+ final double[] data = TestHelper.concatenate(values[0], values[1]);
+ final int n = values[0].length;
+ for (int i = 1; i <= 10; i++) {
+ for (int j = 1; j <= 10; j++) {
+ TestHelper.shuffle(rng, values[0]);
+ TestHelper.shuffle(rng, values[1]);
+ testCombineNonFinite(values);
+ }
+ TestHelper.shuffle(rng, data);
+ System.arraycopy(data, 0, values[0], 0, n);
+ System.arraycopy(data, n, values[1], 0, values[1].length);
+ testCombineNonFinite(values);
+ }
+ }
+
+ @ParameterizedTest
+ @MethodSource(value =
"org.apache.commons.statistics.descriptive.TestData#testCombineNonFinite")
+ void testArrayOfArraysNonFinite(double[][] values) {
+ final double expected = Double.NaN;
+ final double actual = Arrays.stream(values)
+ .map(Variance::of)
+ .reduce(Variance::combine)
+ .map(Variance::getAsDouble)
+ .orElseThrow(RuntimeException::new);
+ Assertions.assertEquals(expected, actual, "array of arrays combined
variance non-finite");
+ }
+
+ // Helper function to compute the expected value of Variance using
BigDecimal.
+ static double computeExpectedVariance(double[] values) {
+ long n = values.length;
+ if (n == 1) {
+ return 0;
+ }
+ BigDecimal mean = TestHelper.computeExpectedMean(values);
+ BigDecimal bd = BigDecimal.ZERO;
+ for (double value : values) {
+ BigDecimal bdDiff = new BigDecimal(value, MathContext.DECIMAL128);
+ bdDiff = bdDiff.subtract(mean);
+ bdDiff = bdDiff.pow(2);
+ bd = bd.add(bdDiff);
+ }
+ return bd.divide(BigDecimal.valueOf(n - 1),
MathContext.DECIMAL128).doubleValue();
+ }
+}