Repository: commons-math Updated Branches: refs/heads/MATH_3_X 9f1368715 -> 5511eec3b
[MATH-837] Support aggregation of any kind of StatisticalSummary in AggregateSummaryStatistics. Project: http://git-wip-us.apache.org/repos/asf/commons-math/repo Commit: http://git-wip-us.apache.org/repos/asf/commons-math/commit/5511eec3 Tree: http://git-wip-us.apache.org/repos/asf/commons-math/tree/5511eec3 Diff: http://git-wip-us.apache.org/repos/asf/commons-math/diff/5511eec3 Branch: refs/heads/MATH_3_X Commit: 5511eec3b4dff7e50c6d662b32749b25d18c5b91 Parents: 9f13687 Author: Thomas Neidhart <[email protected]> Authored: Mon Oct 19 21:36:15 2015 +0200 Committer: Thomas Neidhart <[email protected]> Committed: Mon Oct 19 21:36:15 2015 +0200 ---------------------------------------------------------------------- src/changes/changes.xml | 4 ++ .../descriptive/AggregateSummaryStatistics.java | 12 +++--- .../AggregateSummaryStatisticsTest.java | 40 ++++++++++++++++++-- 3 files changed, 48 insertions(+), 8 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/commons-math/blob/5511eec3/src/changes/changes.xml ---------------------------------------------------------------------- diff --git a/src/changes/changes.xml b/src/changes/changes.xml index 1d0de9a..8d466fd 100644 --- a/src/changes/changes.xml +++ b/src/changes/changes.xml @@ -51,6 +51,10 @@ If the output is not quite correct, check for invisible trailing spaces! </properties> <body> <release version="3.6" date="XXXX-XX-XX" description=""> + <action dev="tn" type="add" issue="MATH-837"> + "AggregateSummaryStatistics" can now aggregate any kind of + "StatisticalSummary". + </action> <action dev="erans" type="fix" issue="MATH-1279"> Check precondition (class "o.a.c.m.random.EmpiricalDistribution"). </action> http://git-wip-us.apache.org/repos/asf/commons-math/blob/5511eec3/src/main/java/org/apache/commons/math3/stat/descriptive/AggregateSummaryStatistics.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/commons/math3/stat/descriptive/AggregateSummaryStatistics.java b/src/main/java/org/apache/commons/math3/stat/descriptive/AggregateSummaryStatistics.java index ebc3e33..b1ccce5 100644 --- a/src/main/java/org/apache/commons/math3/stat/descriptive/AggregateSummaryStatistics.java +++ b/src/main/java/org/apache/commons/math3/stat/descriptive/AggregateSummaryStatistics.java @@ -302,20 +302,21 @@ public class AggregateSummaryStatistics implements StatisticalSummary, * @param statistics collection of SummaryStatistics to aggregate * @return summary statistics for the combined dataset */ - public static StatisticalSummaryValues aggregate(Collection<SummaryStatistics> statistics) { + public static StatisticalSummaryValues aggregate(Collection<? extends StatisticalSummary> statistics) { if (statistics == null) { return null; } - Iterator<SummaryStatistics> iterator = statistics.iterator(); + Iterator<? extends StatisticalSummary> iterator = statistics.iterator(); if (!iterator.hasNext()) { return null; } - SummaryStatistics current = iterator.next(); + StatisticalSummary current = iterator.next(); long n = current.getN(); double min = current.getMin(); double sum = current.getSum(); double max = current.getMax(); - double m2 = current.getSecondMoment(); + double var = current.getVariance(); + double m2 = var * (n - 1d); double mean = current.getMean(); while (iterator.hasNext()) { current = iterator.next(); @@ -331,7 +332,8 @@ public class AggregateSummaryStatistics implements StatisticalSummary, n += curN; final double meanDiff = current.getMean() - mean; mean = sum / n; - m2 = m2 + current.getSecondMoment() + meanDiff * meanDiff * oldN * curN / n; + final double curM2 = current.getVariance() * (curN - 1d); + m2 = m2 + curM2 + meanDiff * meanDiff * oldN * curN / n; } final double variance; if (n == 0) { http://git-wip-us.apache.org/repos/asf/commons-math/blob/5511eec3/src/test/java/org/apache/commons/math3/stat/descriptive/AggregateSummaryStatisticsTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/commons/math3/stat/descriptive/AggregateSummaryStatisticsTest.java b/src/test/java/org/apache/commons/math3/stat/descriptive/AggregateSummaryStatisticsTest.java index ce48dbe..2db1ee4 100644 --- a/src/test/java/org/apache/commons/math3/stat/descriptive/AggregateSummaryStatisticsTest.java +++ b/src/test/java/org/apache/commons/math3/stat/descriptive/AggregateSummaryStatisticsTest.java @@ -33,7 +33,6 @@ import org.junit.Test; /** * Test cases for {@link AggregateSummaryStatistics} - * */ public class AggregateSummaryStatisticsTest { @@ -129,7 +128,6 @@ public class AggregateSummaryStatisticsTest { * partition and comparing the result of aggregate(...) applied to the collection * of per-partition SummaryStatistics with a single SummaryStatistics computed * over the full sample. - * */ @Test public void testAggregate() { @@ -163,6 +161,42 @@ public class AggregateSummaryStatisticsTest { assertEquals(totalStats.getSummary(), aggregatedStats, 10E-12); } + /** + * Similar to {@link #testAggregate()} but operating on + * {@link StatisticalSummary} instead. + */ + @Test + public void testAggregateStatisticalSummary() { + + // Generate a random sample and random partition + double[] totalSample = generateSample(); + double[][] subSamples = generatePartition(totalSample); + int nSamples = subSamples.length; + + // Compute combined stats directly + SummaryStatistics totalStats = new SummaryStatistics(); + for (int i = 0; i < totalSample.length; i++) { + totalStats.addValue(totalSample[i]); + } + + // Now compute subsample stats individually and aggregate + SummaryStatistics[] subSampleStats = new SummaryStatistics[nSamples]; + for (int i = 0; i < nSamples; i++) { + subSampleStats[i] = new SummaryStatistics(); + } + Collection<StatisticalSummary> aggregate = new ArrayList<StatisticalSummary>(); + for (int i = 0; i < nSamples; i++) { + for (int j = 0; j < subSamples[i].length; j++) { + subSampleStats[i].addValue(subSamples[i][j]); + } + aggregate.add(subSampleStats[i].getSummary()); + } + + // Compare values + StatisticalSummary aggregatedStats = AggregateSummaryStatistics.aggregate(aggregate); + assertEquals(totalStats.getSummary(), aggregatedStats, 10E-12); + } + @Test public void testAggregateDegenerate() { @@ -266,7 +300,7 @@ public class AggregateSummaryStatisticsTest { final double[][] out = new double[5][]; int cur = 0; // beginning of current partition segment int offset = 0; // end of current partition segment - int sampleCount = 0; // number of segments defined + int sampleCount = 0; // number of segments defined for (int i = 0; i < 5; i++) { if (cur == length || offset == length) { break;
