This is an automated email from the ASF dual-hosted git repository.

aherbert pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-statistics.git


The following commit(s) were added to refs/heads/master by this push:
     new 0db9bff  STATISTICS-71: Add StandardDeviation statistic
0db9bff is described below

commit 0db9bff9ce98f57a53798069118734d9da3c4844
Author: Alex Herbert <[email protected]>
AuthorDate: Sun Oct 15 14:01:35 2023 +0100

    STATISTICS-71: Add StandardDeviation statistic
---
 .../{Variance.java => StandardDeviation.java}      |  43 +++---
 .../commons/statistics/descriptive/Variance.java   |   1 +
 .../descriptive/StandardDeviationTest.java         | 153 +++++++++++++++++++++
 3 files changed, 176 insertions(+), 21 deletions(-)

diff --git 
a/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/Variance.java
 
b/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/StandardDeviation.java
similarity index 78%
copy from 
commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/Variance.java
copy to 
commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/StandardDeviation.java
index f6c62b2..71de8d7 100644
--- 
a/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/Variance.java
+++ 
b/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/StandardDeviation.java
@@ -17,10 +17,10 @@
 package org.apache.commons.statistics.descriptive;
 
 /**
- * Computes the variance of the available values. Uses the following definition
- * of the <em>sample variance</em>:
+ * Computes the stndard deviation of the available values. Uses the following 
definition
+ * of the <em>sample standard deviation</em>:
  *
- * <p>\[ \tfrac{1}{n-1} \sum_{i=1}^n (x_i-\overline{x})^2 \]
+ * <p>\[ \sqrt{ \tfrac{1}{n-1} \sum_{i=1}^n (x_i-\overline{x})^2 } \]
  *
  * <p>where \( \overline{x} \) is the sample mean, and \( n \) is the number 
of samples.
  *
@@ -71,21 +71,22 @@ package org.apache.commons.statistics.descriptive;
  *       <a href="https://doi.org/10.2307/2683386";>doi: 10.2307/2683386</a>
  * </ul>
  *
- * @see <a href="https://en.wikipedia.org/wiki/Variance";>Variance 
(Wikipedia)</a>
+ * @see <a href="https://en.wikipedia.org/wiki/Standard_deviation";>Standard 
deviation (Wikipedia)</a>
+ * @see Variance
  * @since 1.1
  */
-public final class Variance implements DoubleStatistic, 
DoubleStatisticAccumulator<Variance> {
+public final class StandardDeviation implements DoubleStatistic, 
DoubleStatisticAccumulator<StandardDeviation> {
 
     /**
      * An instance of {@link SumOfSquaredDeviations}, which is used to
-     * compute the variance.
+     * compute the standard deviation.
      */
     private final SumOfSquaredDeviations ss;
 
     /**
      * Create an instance.
      */
-    private Variance() {
+    private StandardDeviation() {
         this(new SumOfSquaredDeviations());
     }
 
@@ -94,7 +95,7 @@ public final class Variance implements DoubleStatistic, 
DoubleStatisticAccumulat
      *
      * @param ss Sum of squared deviations.
      */
-    private Variance(SumOfSquaredDeviations ss) {
+    private StandardDeviation(SumOfSquaredDeviations ss) {
         this.ss = ss;
     }
 
@@ -103,25 +104,25 @@ public final class Variance implements DoubleStatistic, 
DoubleStatisticAccumulat
      *
      * <p>The initial result is {@code NaN}.
      *
-     * @return {@code Variance} instance.
+     * @return {@code StandardDeviation} instance.
      */
-    public static Variance create() {
-        return new Variance();
+    public static StandardDeviation create() {
+        return new StandardDeviation();
     }
 
     /**
      * Returns an instance populated using the input {@code values}.
      *
-     * <p>Note: {@code Variance} computed using {@link #accept(double) accept} 
may be
-     * different from this variance.
+     * <p>Note: {@code StandardDeviation} computed using {@link 
#accept(double) accept} may be
+     * different from this standard deviation.
      *
-     * <p>See {@link Variance} for details on the computing algorithm.
+     * <p>See {@link StandardDeviation} for details on the computing algorithm.
      *
      * @param values Values.
-     * @return {@code Variance} instance.
+     * @return {@code StandardDeviation} instance.
      */
-    public static Variance of(double... values) {
-        return new Variance(SumOfSquaredDeviations.of(values));
+    public static StandardDeviation of(double... values) {
+        return new StandardDeviation(SumOfSquaredDeviations.of(values));
     }
 
     /**
@@ -135,11 +136,11 @@ public final class Variance implements DoubleStatistic, 
DoubleStatisticAccumulat
     }
 
     /**
-     * Gets the variance of all input values.
+     * Gets the standard deviation of all input values.
      *
      * <p>When no values have been added, the result is {@code NaN}.
      *
-     * @return variance of all values.
+     * @return standard deviation of all values.
      */
     @Override
     public double getAsDouble() {
@@ -152,11 +153,11 @@ public final class Variance implements DoubleStatistic, 
DoubleStatisticAccumulat
         }
         final long n = ss.n;
         // Avoid a divide by zero
-        return n == 1 ? 0 : m2 / (n - 1.0);
+        return n == 1 ? 0 : Math.sqrt(m2 / (n - 1.0));
     }
 
     @Override
-    public Variance combine(Variance other) {
+    public StandardDeviation combine(StandardDeviation other) {
         ss.combine(other.ss);
         return this;
     }
diff --git 
a/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/Variance.java
 
b/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/Variance.java
index f6c62b2..08f03e2 100644
--- 
a/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/Variance.java
+++ 
b/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/Variance.java
@@ -72,6 +72,7 @@ package org.apache.commons.statistics.descriptive;
  * </ul>
  *
  * @see <a href="https://en.wikipedia.org/wiki/Variance";>Variance 
(Wikipedia)</a>
+ * @see StandardDeviation
  * @since 1.1
  */
 public final class Variance implements DoubleStatistic, 
DoubleStatisticAccumulator<Variance> {
diff --git 
a/commons-statistics-descriptive/src/test/java/org/apache/commons/statistics/descriptive/StandardDeviationTest.java
 
b/commons-statistics-descriptive/src/test/java/org/apache/commons/statistics/descriptive/StandardDeviationTest.java
new file mode 100644
index 0000000..ae45a0c
--- /dev/null
+++ 
b/commons-statistics-descriptive/src/test/java/org/apache/commons/statistics/descriptive/StandardDeviationTest.java
@@ -0,0 +1,153 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.statistics.descriptive;
+
+import java.util.Arrays;
+import java.util.stream.Stream;
+import org.apache.commons.statistics.distribution.DoubleTolerance;
+import org.apache.commons.statistics.distribution.DoubleTolerances;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.MethodSource;
+
+/**
+ * Test for {@link StandardDeviation}.
+ * This test is based on the {@link VarianceTest}. The standard deviations is
+ * tested to be consistent with the square root of the variance.
+ */
+final class StandardDeviationTest extends 
BaseDoubleStatisticTest<StandardDeviation> {
+
+    @Override
+    protected StandardDeviation create() {
+        return StandardDeviation.create();
+    }
+
+    @Override
+    protected StandardDeviation create(double... values) {
+        return StandardDeviation.of(values);
+    }
+
+    @Override
+    protected double getEmptyValue() {
+        return Double.NaN;
+    }
+
+    @Override
+    protected double getExpectedValue(double[] values) {
+        return computeExpectedStandardDeviation(values);
+    }
+
+    @Override
+    protected double getExpectedNonFiniteValue(double[] values) {
+        // Not supported
+        return Double.NaN;
+    }
+
+    // Re-use tolerances from the VarianceTest
+
+    @Override
+    protected DoubleTolerance getToleranceAccept() {
+        return DoubleTolerances.ulps(15);
+    }
+
+    @Override
+    protected DoubleTolerance getToleranceArray() {
+        return DoubleTolerances.ulps(10);
+    }
+
+    @Override
+    protected DoubleTolerance getToleranceAcceptAndCombine() {
+        return DoubleTolerances.ulps(15);
+    }
+
+    @Override
+    protected DoubleTolerance getToleranceArrayAndCombine() {
+        return DoubleTolerances.ulps(10);
+    }
+
+    @Override
+    protected Stream<StatisticTestData> streamTestData() {
+        final Stream.Builder<StatisticTestData> builder = Stream.builder();
+        TestData.momentTestData().forEach(x -> builder.accept(addCase(x)));
+        // Non-finite sum-of-squared deviations
+        builder.accept(addReference(Double.NaN, DoubleTolerances.equals(), 0, 
0x1.0p1023));
+        // Python Numpy v1.25.1: numpy.std(x, ddof=1)
+        builder.accept(addReference(1.2909944487358056, 
DoubleTolerances.ulps(2), 1, 2, 3, 4));
+        builder.accept(addReference(2.73030134866931, 
DoubleTolerances.ulps(10),
+            14, 8, 11, 10, 7, 9, 10, 11, 10, 15, 5, 10));
+        final double[] a = new double[2 * 512 * 512];
+        Arrays.fill(a, 0, a.length / 2, 1.0);
+        Arrays.fill(a, a.length / 2, a.length, 0.1);
+        // Note: if ddof=0 the std.dev. is sqrt(((1-0.55)**2 + 
(0.1-0.55)**2)/2) = 0.45
+        builder.accept(addReference(0.4500004291540563, 
createRelTolerance(1e-11), a));
+        // R v4.3.1: sd(x)
+        builder.accept(addReference(3.0276503540974917, 
DoubleTolerances.ulps(2), 1, 2, 3, 4, 5, 6, 7, 8, 9, 10));
+        builder.accept(addReference(13.369741957120938, 
DoubleTolerances.ulps(2), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 50));
+        return builder.build();
+    }
+
+    @ParameterizedTest
+    @MethodSource("testAccept")
+    void testConsistentWithVarianceAccept(double[] values) {
+        final double variance = Statistics.add(Variance.create(), 
values).getAsDouble();
+        final double std = Statistics.add(StandardDeviation.create(), 
values).getAsDouble();
+        Assertions.assertEquals(Math.sqrt(variance), std);
+    }
+
+    @ParameterizedTest
+    @MethodSource("testArray")
+    void testConsistentWithVarianceArray(double[] values) {
+        final double variance = Variance.of(values).getAsDouble();
+        final double std = StandardDeviation.of(values).getAsDouble();
+        Assertions.assertEquals(Math.sqrt(variance), std);
+    }
+
+    @ParameterizedTest
+    @MethodSource("testAcceptAndCombine")
+    void testConsistentWithVarianceCombine(double[][] values) {
+        // Assume the sequential stream will combine in the same order.
+        // Do not use a parallel stream which may be stochastic.
+        final double variance = Arrays.stream(values)
+            .map(Variance::of)
+            .reduce(Variance::combine)
+            .orElseGet(Variance::create)
+            .getAsDouble();
+        final double std = Arrays.stream(values)
+            .map(StandardDeviation::of)
+            .reduce(StandardDeviation::combine)
+            .orElseGet(StandardDeviation::create)
+            .getAsDouble();
+        Assertions.assertEquals(Math.sqrt(variance), std);
+    }
+
+    /**
+     * Helper function to compute the expected standard deviation.
+     *
+     * @param values Values.
+     * @return Standard deviation of values
+     */
+    private static double computeExpectedStandardDeviation(double[] values) {
+        long n = values.length;
+        if (n == 0) {
+            return Double.NaN;
+        }
+        if (n == 1) {
+            return 0;
+        }
+        return Math.sqrt(VarianceTest.computeExpectedVariance(values, null));
+    }
+}

Reply via email to