Repository: hadoop Updated Branches: refs/heads/branch-2 6cdcab907 -> b245e9ce2
HADOOP-13804. MutableStat mean loses accuracy if add(long, long) is used. Contributed by Erik Krogen. (cherry picked from commit 3dbad5d823b8bf61b643dd1057165044138b99e0) Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/b245e9ce Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/b245e9ce Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/b245e9ce Branch: refs/heads/branch-2 Commit: b245e9ce2f20bb84690bffe902a60d5e96130cdb Parents: 6cdcab9 Author: Zhe Zhang <z...@apache.org> Authored: Mon Nov 7 16:08:10 2016 -0800 Committer: Zhe Zhang <z...@apache.org> Committed: Mon Nov 7 16:11:27 2016 -0800 ---------------------------------------------------------------------- .../apache/hadoop/metrics2/lib/MutableStat.java | 4 ++++ .../apache/hadoop/metrics2/util/SampleStat.java | 19 +++++++++++++++---- .../hadoop/metrics2/lib/TestMutableMetrics.java | 17 +++++++++++++++++ 3 files changed, 36 insertions(+), 4 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/b245e9ce/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableStat.java ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableStat.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableStat.java index 5108624..132f57c 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableStat.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/lib/MutableStat.java @@ -102,6 +102,10 @@ public class MutableStat extends MutableMetric { /** * Add a number of samples and their sum to the running stat + * + * Note that although use of this method will preserve accurate mean values, + * large values for numSamples may result in inaccurate variance values due + * to the use of a single step of the Welford method for variance calculation. * @param numSamples number of samples * @param sum of the samples */ http://git-wip-us.apache.org/repos/asf/hadoop/blob/b245e9ce/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/util/SampleStat.java ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/util/SampleStat.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/util/SampleStat.java index cd9aaa4..23abfc4 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/util/SampleStat.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/util/SampleStat.java @@ -27,29 +27,32 @@ import org.apache.hadoop.classification.InterfaceAudience; public class SampleStat { private final MinMax minmax = new MinMax(); private long numSamples = 0; - private double a0, a1, s0, s1; + private double a0, a1, s0, s1, total; /** * Construct a new running sample stat */ public SampleStat() { a0 = s0 = 0.0; + total = 0.0; } public void reset() { numSamples = 0; a0 = s0 = 0.0; + total = 0.0; minmax.reset(); } // We want to reuse the object, sometimes. void reset(long numSamples, double a0, double a1, double s0, double s1, - MinMax minmax) { + double total, MinMax minmax) { this.numSamples = numSamples; this.a0 = a0; this.a1 = a1; this.s0 = s0; this.s1 = s1; + this.total = total; this.minmax.reset(minmax); } @@ -58,7 +61,7 @@ public class SampleStat { * @param other the destination to hold our values */ public void copyTo(SampleStat other) { - other.reset(numSamples, a0, a1, s0, s1, minmax); + other.reset(numSamples, a0, a1, s0, s1, total, minmax); } /** @@ -80,6 +83,7 @@ public class SampleStat { */ public SampleStat add(long nSamples, double x) { numSamples += nSamples; + total += x; if (numSamples == 1) { a0 = a1 = x; @@ -103,10 +107,17 @@ public class SampleStat { } /** + * @return the total of all samples added + */ + public double total() { + return total; + } + + /** * @return the arithmetic mean of the samples */ public double mean() { - return numSamples > 0 ? a1 : 0.0; + return numSamples > 0 ? (total / numSamples) : 0.0; } /** http://git-wip-us.apache.org/repos/asf/hadoop/blob/b245e9ce/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/lib/TestMutableMetrics.java ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/lib/TestMutableMetrics.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/lib/TestMutableMetrics.java index e9f4e10..9161df5 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/lib/TestMutableMetrics.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/lib/TestMutableMetrics.java @@ -130,6 +130,23 @@ public class TestMutableMetrics { } /** + * Tests that when using {@link MutableStat#add(long, long)}, even with a high + * sample count, the mean does not lose accuracy. + */ + @Test public void testMutableStatWithBulkAdd() { + MetricsRecordBuilder rb = mockMetricsRecordBuilder(); + MetricsRegistry registry = new MetricsRegistry("test"); + MutableStat stat = registry.newStat("Test", "Test", "Ops", "Val", false); + + stat.add(1000, 1000); + stat.add(1000, 2000); + registry.snapshot(rb, false); + + assertCounter("TestNumOps", 2000L, rb); + assertGauge("TestAvgVal", 1.5, rb); + } + + /** * Ensure that quantile estimates from {@link MutableQuantiles} are within * specified error bounds. */ --------------------------------------------------------------------- To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-commits-h...@hadoop.apache.org