This is an automated email from the ASF dual-hosted git repository.
aherbert pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-statistics.git
The following commit(s) were added to refs/heads/master by this push:
new 0db9bff STATISTICS-71: Add StandardDeviation statistic
0db9bff is described below
commit 0db9bff9ce98f57a53798069118734d9da3c4844
Author: Alex Herbert <[email protected]>
AuthorDate: Sun Oct 15 14:01:35 2023 +0100
STATISTICS-71: Add StandardDeviation statistic
---
.../{Variance.java => StandardDeviation.java} | 43 +++---
.../commons/statistics/descriptive/Variance.java | 1 +
.../descriptive/StandardDeviationTest.java | 153 +++++++++++++++++++++
3 files changed, 176 insertions(+), 21 deletions(-)
diff --git
a/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/Variance.java
b/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/StandardDeviation.java
similarity index 78%
copy from
commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/Variance.java
copy to
commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/StandardDeviation.java
index f6c62b2..71de8d7 100644
---
a/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/Variance.java
+++
b/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/StandardDeviation.java
@@ -17,10 +17,10 @@
package org.apache.commons.statistics.descriptive;
/**
- * Computes the variance of the available values. Uses the following definition
- * of the <em>sample variance</em>:
+ * Computes the stndard deviation of the available values. Uses the following
definition
+ * of the <em>sample standard deviation</em>:
*
- * <p>\[ \tfrac{1}{n-1} \sum_{i=1}^n (x_i-\overline{x})^2 \]
+ * <p>\[ \sqrt{ \tfrac{1}{n-1} \sum_{i=1}^n (x_i-\overline{x})^2 } \]
*
* <p>where \( \overline{x} \) is the sample mean, and \( n \) is the number
of samples.
*
@@ -71,21 +71,22 @@ package org.apache.commons.statistics.descriptive;
* <a href="https://doi.org/10.2307/2683386">doi: 10.2307/2683386</a>
* </ul>
*
- * @see <a href="https://en.wikipedia.org/wiki/Variance">Variance
(Wikipedia)</a>
+ * @see <a href="https://en.wikipedia.org/wiki/Standard_deviation">Standard
deviation (Wikipedia)</a>
+ * @see Variance
* @since 1.1
*/
-public final class Variance implements DoubleStatistic,
DoubleStatisticAccumulator<Variance> {
+public final class StandardDeviation implements DoubleStatistic,
DoubleStatisticAccumulator<StandardDeviation> {
/**
* An instance of {@link SumOfSquaredDeviations}, which is used to
- * compute the variance.
+ * compute the standard deviation.
*/
private final SumOfSquaredDeviations ss;
/**
* Create an instance.
*/
- private Variance() {
+ private StandardDeviation() {
this(new SumOfSquaredDeviations());
}
@@ -94,7 +95,7 @@ public final class Variance implements DoubleStatistic,
DoubleStatisticAccumulat
*
* @param ss Sum of squared deviations.
*/
- private Variance(SumOfSquaredDeviations ss) {
+ private StandardDeviation(SumOfSquaredDeviations ss) {
this.ss = ss;
}
@@ -103,25 +104,25 @@ public final class Variance implements DoubleStatistic,
DoubleStatisticAccumulat
*
* <p>The initial result is {@code NaN}.
*
- * @return {@code Variance} instance.
+ * @return {@code StandardDeviation} instance.
*/
- public static Variance create() {
- return new Variance();
+ public static StandardDeviation create() {
+ return new StandardDeviation();
}
/**
* Returns an instance populated using the input {@code values}.
*
- * <p>Note: {@code Variance} computed using {@link #accept(double) accept}
may be
- * different from this variance.
+ * <p>Note: {@code StandardDeviation} computed using {@link
#accept(double) accept} may be
+ * different from this standard deviation.
*
- * <p>See {@link Variance} for details on the computing algorithm.
+ * <p>See {@link StandardDeviation} for details on the computing algorithm.
*
* @param values Values.
- * @return {@code Variance} instance.
+ * @return {@code StandardDeviation} instance.
*/
- public static Variance of(double... values) {
- return new Variance(SumOfSquaredDeviations.of(values));
+ public static StandardDeviation of(double... values) {
+ return new StandardDeviation(SumOfSquaredDeviations.of(values));
}
/**
@@ -135,11 +136,11 @@ public final class Variance implements DoubleStatistic,
DoubleStatisticAccumulat
}
/**
- * Gets the variance of all input values.
+ * Gets the standard deviation of all input values.
*
* <p>When no values have been added, the result is {@code NaN}.
*
- * @return variance of all values.
+ * @return standard deviation of all values.
*/
@Override
public double getAsDouble() {
@@ -152,11 +153,11 @@ public final class Variance implements DoubleStatistic,
DoubleStatisticAccumulat
}
final long n = ss.n;
// Avoid a divide by zero
- return n == 1 ? 0 : m2 / (n - 1.0);
+ return n == 1 ? 0 : Math.sqrt(m2 / (n - 1.0));
}
@Override
- public Variance combine(Variance other) {
+ public StandardDeviation combine(StandardDeviation other) {
ss.combine(other.ss);
return this;
}
diff --git
a/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/Variance.java
b/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/Variance.java
index f6c62b2..08f03e2 100644
---
a/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/Variance.java
+++
b/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/Variance.java
@@ -72,6 +72,7 @@ package org.apache.commons.statistics.descriptive;
* </ul>
*
* @see <a href="https://en.wikipedia.org/wiki/Variance">Variance
(Wikipedia)</a>
+ * @see StandardDeviation
* @since 1.1
*/
public final class Variance implements DoubleStatistic,
DoubleStatisticAccumulator<Variance> {
diff --git
a/commons-statistics-descriptive/src/test/java/org/apache/commons/statistics/descriptive/StandardDeviationTest.java
b/commons-statistics-descriptive/src/test/java/org/apache/commons/statistics/descriptive/StandardDeviationTest.java
new file mode 100644
index 0000000..ae45a0c
--- /dev/null
+++
b/commons-statistics-descriptive/src/test/java/org/apache/commons/statistics/descriptive/StandardDeviationTest.java
@@ -0,0 +1,153 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.statistics.descriptive;
+
+import java.util.Arrays;
+import java.util.stream.Stream;
+import org.apache.commons.statistics.distribution.DoubleTolerance;
+import org.apache.commons.statistics.distribution.DoubleTolerances;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.MethodSource;
+
+/**
+ * Test for {@link StandardDeviation}.
+ * This test is based on the {@link VarianceTest}. The standard deviations is
+ * tested to be consistent with the square root of the variance.
+ */
+final class StandardDeviationTest extends
BaseDoubleStatisticTest<StandardDeviation> {
+
+ @Override
+ protected StandardDeviation create() {
+ return StandardDeviation.create();
+ }
+
+ @Override
+ protected StandardDeviation create(double... values) {
+ return StandardDeviation.of(values);
+ }
+
+ @Override
+ protected double getEmptyValue() {
+ return Double.NaN;
+ }
+
+ @Override
+ protected double getExpectedValue(double[] values) {
+ return computeExpectedStandardDeviation(values);
+ }
+
+ @Override
+ protected double getExpectedNonFiniteValue(double[] values) {
+ // Not supported
+ return Double.NaN;
+ }
+
+ // Re-use tolerances from the VarianceTest
+
+ @Override
+ protected DoubleTolerance getToleranceAccept() {
+ return DoubleTolerances.ulps(15);
+ }
+
+ @Override
+ protected DoubleTolerance getToleranceArray() {
+ return DoubleTolerances.ulps(10);
+ }
+
+ @Override
+ protected DoubleTolerance getToleranceAcceptAndCombine() {
+ return DoubleTolerances.ulps(15);
+ }
+
+ @Override
+ protected DoubleTolerance getToleranceArrayAndCombine() {
+ return DoubleTolerances.ulps(10);
+ }
+
+ @Override
+ protected Stream<StatisticTestData> streamTestData() {
+ final Stream.Builder<StatisticTestData> builder = Stream.builder();
+ TestData.momentTestData().forEach(x -> builder.accept(addCase(x)));
+ // Non-finite sum-of-squared deviations
+ builder.accept(addReference(Double.NaN, DoubleTolerances.equals(), 0,
0x1.0p1023));
+ // Python Numpy v1.25.1: numpy.std(x, ddof=1)
+ builder.accept(addReference(1.2909944487358056,
DoubleTolerances.ulps(2), 1, 2, 3, 4));
+ builder.accept(addReference(2.73030134866931,
DoubleTolerances.ulps(10),
+ 14, 8, 11, 10, 7, 9, 10, 11, 10, 15, 5, 10));
+ final double[] a = new double[2 * 512 * 512];
+ Arrays.fill(a, 0, a.length / 2, 1.0);
+ Arrays.fill(a, a.length / 2, a.length, 0.1);
+ // Note: if ddof=0 the std.dev. is sqrt(((1-0.55)**2 +
(0.1-0.55)**2)/2) = 0.45
+ builder.accept(addReference(0.4500004291540563,
createRelTolerance(1e-11), a));
+ // R v4.3.1: sd(x)
+ builder.accept(addReference(3.0276503540974917,
DoubleTolerances.ulps(2), 1, 2, 3, 4, 5, 6, 7, 8, 9, 10));
+ builder.accept(addReference(13.369741957120938,
DoubleTolerances.ulps(2), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 50));
+ return builder.build();
+ }
+
+ @ParameterizedTest
+ @MethodSource("testAccept")
+ void testConsistentWithVarianceAccept(double[] values) {
+ final double variance = Statistics.add(Variance.create(),
values).getAsDouble();
+ final double std = Statistics.add(StandardDeviation.create(),
values).getAsDouble();
+ Assertions.assertEquals(Math.sqrt(variance), std);
+ }
+
+ @ParameterizedTest
+ @MethodSource("testArray")
+ void testConsistentWithVarianceArray(double[] values) {
+ final double variance = Variance.of(values).getAsDouble();
+ final double std = StandardDeviation.of(values).getAsDouble();
+ Assertions.assertEquals(Math.sqrt(variance), std);
+ }
+
+ @ParameterizedTest
+ @MethodSource("testAcceptAndCombine")
+ void testConsistentWithVarianceCombine(double[][] values) {
+ // Assume the sequential stream will combine in the same order.
+ // Do not use a parallel stream which may be stochastic.
+ final double variance = Arrays.stream(values)
+ .map(Variance::of)
+ .reduce(Variance::combine)
+ .orElseGet(Variance::create)
+ .getAsDouble();
+ final double std = Arrays.stream(values)
+ .map(StandardDeviation::of)
+ .reduce(StandardDeviation::combine)
+ .orElseGet(StandardDeviation::create)
+ .getAsDouble();
+ Assertions.assertEquals(Math.sqrt(variance), std);
+ }
+
+ /**
+ * Helper function to compute the expected standard deviation.
+ *
+ * @param values Values.
+ * @return Standard deviation of values
+ */
+ private static double computeExpectedStandardDeviation(double[] values) {
+ long n = values.length;
+ if (n == 0) {
+ return Double.NaN;
+ }
+ if (n == 1) {
+ return 0;
+ }
+ return Math.sqrt(VarianceTest.computeExpectedVariance(values, null));
+ }
+}