This is an automated email from the ASF dual-hosted git repository.
sijie pushed a commit to branch branch-4.9
in repository https://gitbox.apache.org/repos/asf/bookkeeper.git
The following commit(s) were added to refs/heads/branch-4.9 by this push:
new 7f7610b ISSUE #2053: Bugfix for Percentile Calculation in
FastCodahale Timer Implementation
7f7610b is described below
commit 7f7610b741d29f0fe44332127028c53fb8627f18
Author: Nicolas Michael <[email protected]>
AuthorDate: Tue Apr 9 18:44:52 2019 -0700
ISSUE #2053: Bugfix for Percentile Calculation in FastCodahale Timer
Implementation
This bugfix for the FastCodahale timer implementation ensures that
percentiles provided by a FastSnapshot are calculated correctly even if the
total count of events (provided by FastTimer) is out of sync with the recorded
events in the percentile buckets.
### Motivation
FastCodahale Timer implementation may miscalculate percentiles if snapshots
of values are slightly out of sync, and if only few events have been recorded.
FastCodahale Timers use fine-grained locking and are meant to tolerate that
(some) values change while being recorded or while snapshots are created.
Currently, the total count of requests is not synchronized with the number of
requests recorded in percentile buckets. If a snapshot is created while the
total count of the timer has been incremented beyond the sum of values in the
percentile buckets, the percentile calculation may produce wrong values.
For example, if 3 percentile values have been recorded, but the overall
count is 4, then the percentile calculation would be based on 4 values. This
becomes most obvious if a percentile > .75 (e.g. p95) is being calculated. For
this, the implementation will try to find 0.95 * 4 values, which is more than
the 3 values recorded in the buckets. Since no bucket fulfills the criteria,
the bound of the last (overflow) bucket will be returned, i.e. Long.MAX_VALUE.
### Changes
FastSnapshots now bases the percentile calculation on the sum of values in
the percentile buckets rather than a count provided by the caller (i.e.
FastTimer). This ensures that percentiles are calculated correctly without the
need of having all counters fully synchronized.
Master Issue: #2053
Reviewers: Jia Zhai <[email protected]>, Sijie Guo <[email protected]>
This closes #2054 from nicmichael/fast-codahale-bugfix, closes #2053
(cherry picked from commit 848f8527f9d4745753b2f1e22ac1b8e981ea0d1d)
Signed-off-by: Sijie Guo <[email protected]>
---
.../bookkeeper/stats/codahale/FastSnapshot.java | 19 ++++++++++++++++--
.../bookkeeper/stats/codahale/FastTimerTest.java | 23 ++++++++++++++++++++++
2 files changed, 40 insertions(+), 2 deletions(-)
diff --git
a/bookkeeper-stats-providers/codahale-metrics-provider/src/main/java/org/apache/bookkeeper/stats/codahale/FastSnapshot.java
b/bookkeeper-stats-providers/codahale-metrics-provider/src/main/java/org/apache/bookkeeper/stats/codahale/FastSnapshot.java
index ee6466e..f6ceb88 100644
---
a/bookkeeper-stats-providers/codahale-metrics-provider/src/main/java/org/apache/bookkeeper/stats/codahale/FastSnapshot.java
+++
b/bookkeeper-stats-providers/codahale-metrics-provider/src/main/java/org/apache/bookkeeper/stats/codahale/FastSnapshot.java
@@ -32,6 +32,7 @@ public class FastSnapshot extends Snapshot {
private final long max;
private final long sum;
private final long cnt;
+ private final long pcnt;
private final long[] values;
@SuppressFBWarnings(
@@ -43,18 +44,19 @@ public class FastSnapshot extends Snapshot {
this.max = max;
this.sum = sum;
this.cnt = cnt;
+ this.pcnt = values != null ? sumOf(values) : 0;
this.values = values;
}
@Override
public double getValue(double quantile) {
- if (cnt == 0 || values == null) {
+ if (pcnt == 0 || values == null) {
return 0;
}
long qcnt = 0;
for (int i = 0; i < values.length; i++) {
qcnt += values[i];
- if (((double) qcnt) / ((double) cnt) > quantile) {
+ if (((double) qcnt) / ((double) pcnt) > quantile) {
return timer.getBucketBound(i);
}
}
@@ -105,4 +107,17 @@ public class FastSnapshot extends Snapshot {
// values in this snapshot represent percentile buckets, but not
discrete values
}
+ /**
+ * Calculates the sum of values of an array.
+ * @param a an array of values
+ * @return the sum of all array values
+ */
+ private long sumOf(long[] a) {
+ long sum = 0;
+ for (long x : a) {
+ sum += x;
+ }
+ return sum;
+ }
+
}
\ No newline at end of file
diff --git
a/bookkeeper-stats-providers/codahale-metrics-provider/src/test/java/org/apache/bookkeeper/stats/codahale/FastTimerTest.java
b/bookkeeper-stats-providers/codahale-metrics-provider/src/test/java/org/apache/bookkeeper/stats/codahale/FastTimerTest.java
index 34ae5c3..b3a744f 100644
---
a/bookkeeper-stats-providers/codahale-metrics-provider/src/test/java/org/apache/bookkeeper/stats/codahale/FastTimerTest.java
+++
b/bookkeeper-stats-providers/codahale-metrics-provider/src/test/java/org/apache/bookkeeper/stats/codahale/FastTimerTest.java
@@ -216,4 +216,27 @@ public class FastTimerTest {
assertEquals("FastSnapshot.getMean()", 10, (int)
Math.round(s.getMean() / 1000000));
}
+ @Test
+ public void testSnapshotOutOfSync() {
+ FastTimer t = getMockedFastTimer(1, FastTimer.Buckets.fine);
+ t.update(t.getBucketBound(0) - 1, TimeUnit.NANOSECONDS); // add value
to 1st bucket
+ t.update(t.getBucketBound(1) - 1, TimeUnit.NANOSECONDS); // add value
to 2nd bucket
+ t.update(t.getBucketBound(2) - 1, TimeUnit.NANOSECONDS); // add value
to 3rd bucket
+ incSec(); // advance mocked time to next second
+ Snapshot s1 = t.getSnapshot();
+ long[] buckets = new long[t.getNumberOfBuckets()];
+ buckets[0] = 1;
+ buckets[1] = 1;
+ buckets[2] = 1;
+ Snapshot s2 = new FastSnapshot(t,
+ t.getBucketBound(0) - 1,
+ t.getBucketBound(2) - 1,
+ t.getBucketBound(0) + t.getBucketBound(1) +
t.getBucketBound(2) + 3,
+ 4, // count (4) is out of sync with number of recorded events
in buckets (3)
+ buckets);
+ assertEquals("FastSnapshot.getMin()", s1.getMin(), s2.getMin());
+ assertEquals("FastSnapshot.getMax()", s1.getMax(), s2.getMax());
+ assertEquals("FastSnapshot.getValue(0.95)", (long) s1.getValue(0.95),
(long) s2.getValue(0.95));
+ }
+
}