This is an automated email from the ASF dual-hosted git repository.

sijie pushed a commit to branch branch-4.9
in repository https://gitbox.apache.org/repos/asf/bookkeeper.git


The following commit(s) were added to refs/heads/branch-4.9 by this push:
     new 7f7610b  ISSUE #2053: Bugfix for Percentile Calculation in 
FastCodahale Timer Implementation
7f7610b is described below

commit 7f7610b741d29f0fe44332127028c53fb8627f18
Author: Nicolas Michael <[email protected]>
AuthorDate: Tue Apr 9 18:44:52 2019 -0700

    ISSUE #2053: Bugfix for Percentile Calculation in FastCodahale Timer 
Implementation
    
    This bugfix for the FastCodahale timer implementation ensures that 
percentiles provided by a FastSnapshot are calculated correctly even if the 
total count of events (provided by FastTimer) is out of sync with the recorded 
events in the percentile buckets.
    
    ### Motivation
    
    FastCodahale Timer implementation may miscalculate percentiles if snapshots 
of values are slightly out of sync, and if only few events have been recorded.
    
    FastCodahale Timers use fine-grained locking and are meant to tolerate that 
(some) values change while being recorded or while snapshots are created. 
Currently, the total count of requests is not synchronized with the number of 
requests recorded in percentile buckets. If a snapshot is created while the 
total count of the timer has been incremented beyond the sum of values in the 
percentile buckets, the percentile calculation may produce wrong values.
    
    For example, if 3 percentile values have been recorded, but the overall 
count is 4, then the percentile calculation would be based on 4 values. This 
becomes most obvious if a percentile > .75 (e.g. p95) is being calculated. For 
this, the implementation will try to find 0.95 * 4 values, which is more than 
the 3 values recorded in the buckets. Since no bucket fulfills the criteria, 
the bound of the last (overflow) bucket will be returned, i.e. Long.MAX_VALUE.
    
    ### Changes
    
    FastSnapshots now bases the percentile calculation on the sum of values in 
the percentile buckets rather than a count provided by the caller (i.e. 
FastTimer). This ensures that percentiles are calculated correctly without the 
need of having all counters fully synchronized.
    
    Master Issue: #2053
    
    
    Reviewers: Jia Zhai <[email protected]>, Sijie Guo <[email protected]>
    
    This closes #2054 from nicmichael/fast-codahale-bugfix, closes #2053
    
    (cherry picked from commit 848f8527f9d4745753b2f1e22ac1b8e981ea0d1d)
    Signed-off-by: Sijie Guo <[email protected]>
---
 .../bookkeeper/stats/codahale/FastSnapshot.java    | 19 ++++++++++++++++--
 .../bookkeeper/stats/codahale/FastTimerTest.java   | 23 ++++++++++++++++++++++
 2 files changed, 40 insertions(+), 2 deletions(-)

diff --git 
a/bookkeeper-stats-providers/codahale-metrics-provider/src/main/java/org/apache/bookkeeper/stats/codahale/FastSnapshot.java
 
b/bookkeeper-stats-providers/codahale-metrics-provider/src/main/java/org/apache/bookkeeper/stats/codahale/FastSnapshot.java
index ee6466e..f6ceb88 100644
--- 
a/bookkeeper-stats-providers/codahale-metrics-provider/src/main/java/org/apache/bookkeeper/stats/codahale/FastSnapshot.java
+++ 
b/bookkeeper-stats-providers/codahale-metrics-provider/src/main/java/org/apache/bookkeeper/stats/codahale/FastSnapshot.java
@@ -32,6 +32,7 @@ public class FastSnapshot extends Snapshot {
     private final long max;
     private final long sum;
     private final long cnt;
+    private final long pcnt;
     private final long[] values;
 
     @SuppressFBWarnings(
@@ -43,18 +44,19 @@ public class FastSnapshot extends Snapshot {
         this.max = max;
         this.sum = sum;
         this.cnt = cnt;
+        this.pcnt = values != null ? sumOf(values) : 0;
         this.values = values;
     }
 
     @Override
     public double getValue(double quantile) {
-        if (cnt == 0 || values == null) {
+        if (pcnt == 0 || values == null) {
             return 0;
         }
         long qcnt = 0;
         for (int i = 0; i < values.length; i++) {
             qcnt += values[i];
-            if (((double) qcnt) / ((double) cnt) > quantile) {
+            if (((double) qcnt) / ((double) pcnt) > quantile) {
                 return timer.getBucketBound(i);
             }
         }
@@ -105,4 +107,17 @@ public class FastSnapshot extends Snapshot {
         // values in this snapshot represent percentile buckets, but not 
discrete values
     }
 
+    /**
+     * Calculates the sum of values of an array.
+     * @param a an array of values
+     * @return the sum of all array values
+     */
+    private long sumOf(long[] a) {
+        long sum = 0;
+        for (long x : a) {
+            sum += x;
+        }
+        return sum;
+    }
+
 }
\ No newline at end of file
diff --git 
a/bookkeeper-stats-providers/codahale-metrics-provider/src/test/java/org/apache/bookkeeper/stats/codahale/FastTimerTest.java
 
b/bookkeeper-stats-providers/codahale-metrics-provider/src/test/java/org/apache/bookkeeper/stats/codahale/FastTimerTest.java
index 34ae5c3..b3a744f 100644
--- 
a/bookkeeper-stats-providers/codahale-metrics-provider/src/test/java/org/apache/bookkeeper/stats/codahale/FastTimerTest.java
+++ 
b/bookkeeper-stats-providers/codahale-metrics-provider/src/test/java/org/apache/bookkeeper/stats/codahale/FastTimerTest.java
@@ -216,4 +216,27 @@ public class FastTimerTest {
         assertEquals("FastSnapshot.getMean()", 10, (int) 
Math.round(s.getMean() / 1000000));
     }
 
+    @Test
+    public void testSnapshotOutOfSync() {
+        FastTimer t = getMockedFastTimer(1, FastTimer.Buckets.fine);
+        t.update(t.getBucketBound(0) - 1, TimeUnit.NANOSECONDS); // add value 
to 1st bucket
+        t.update(t.getBucketBound(1) - 1, TimeUnit.NANOSECONDS); // add value 
to 2nd bucket
+        t.update(t.getBucketBound(2) - 1, TimeUnit.NANOSECONDS); // add value 
to 3rd bucket
+        incSec(); // advance mocked time to next second
+        Snapshot s1 = t.getSnapshot();
+        long[] buckets = new long[t.getNumberOfBuckets()];
+        buckets[0] = 1;
+        buckets[1] = 1;
+        buckets[2] = 1;
+        Snapshot s2 = new FastSnapshot(t,
+                t.getBucketBound(0) - 1,
+                t.getBucketBound(2) - 1,
+                t.getBucketBound(0) + t.getBucketBound(1) + 
t.getBucketBound(2) + 3,
+                4, // count (4) is out of sync with number of recorded events 
in buckets (3)
+                buckets);
+        assertEquals("FastSnapshot.getMin()", s1.getMin(), s2.getMin());
+        assertEquals("FastSnapshot.getMax()", s1.getMax(), s2.getMax());
+        assertEquals("FastSnapshot.getValue(0.95)", (long) s1.getValue(0.95), 
(long) s2.getValue(0.95));
+    }
+
 }

Reply via email to