This is an automated email from the ASF dual-hosted git repository.

xiangfu pushed a commit to branch claude/crazy-wilbur
in repository https://gitbox.apache.org/repos/asf/pinot.git

commit 072cbecbc4e8bdcaf909940752bbb7d84e2a2713
Author: Xiang Fu <[email protected]>
AuthorDate: Mon Apr 6 03:26:19 2026 -0700

    Upgrade t-digest from 3.2 to 3.3 with error rate fix
    
    Resolves the accuracy regression that blocked #7076 by using higher
    compression (750) in the pre-aggregated star-tree test to keep
    star-tree vs non-star-tree quantile divergence below 0.5%.
    
    t-digest 3.3 changed centroid management (unit-weight first/last
    centroids, stricter tail interpolation), which increases merge-order
    sensitivity. The star-tree path does multi-level serialize/deserialize/merge
    while the non-star-tree path merges sequentially, causing quantile
    divergence at low compression values.
    
    Experimental results on the PreAggregated star-tree test (10 randomized 
runs each):
    - compression=300, MAX_ERROR=0.5%: 0/10 passes (errors 0.54-1.07%)
    - compression=500, MAX_ERROR=0.5%: fails (0.62% error)
    - compression=750, MAX_ERROR=0.5%: 10/10 passes
    - compression=1000, MAX_ERROR=0.5%: 10/10 passes
    
    For comparison, t-digest 3.2 with compression=300 passes 10/10 at 0.5%.
    
    Co-Authored-By: Claude Opus 4.6 <[email protected]>
---
 LICENSE-binary                                     |  2 +-
 ...centileSmartTDigestAggregationFunctionTest.java | 27 ++++++----------------
 ...PercentileTDigestMVAggregationFunctionTest.java | 10 ++++----
 ...eAggregatedPercentileTDigestStarTreeV2Test.java | 11 ++++++---
 pom.xml                                            |  2 +-
 5 files changed, 22 insertions(+), 30 deletions(-)

diff --git a/LICENSE-binary b/LICENSE-binary
index a759491a835..521d92241d5 100644
--- a/LICENSE-binary
+++ b/LICENSE-binary
@@ -278,7 +278,7 @@ com.squareup.wire:wire-runtime-jvm:5.1.0
 com.squareup.wire:wire-schema-jvm:5.1.0
 com.squareup:javapoet:1.13.0
 com.squareup:kotlinpoet-jvm:1.18.1
-com.tdunning:t-digest:3.2
+com.tdunning:t-digest:3.3
 com.typesafe.scala-logging:scala-logging_2.13:3.9.5
 com.uber:h3:4.4.0
 com.yammer.metrics:metrics-core:2.2.0
diff --git 
a/pinot-core/src/test/java/org/apache/pinot/core/query/aggregation/function/PercentileSmartTDigestAggregationFunctionTest.java
 
b/pinot-core/src/test/java/org/apache/pinot/core/query/aggregation/function/PercentileSmartTDigestAggregationFunctionTest.java
index 68a180ea886..bfad00e8275 100644
--- 
a/pinot-core/src/test/java/org/apache/pinot/core/query/aggregation/function/PercentileSmartTDigestAggregationFunctionTest.java
+++ 
b/pinot-core/src/test/java/org/apache/pinot/core/query/aggregation/function/PercentileSmartTDigestAggregationFunctionTest.java
@@ -34,39 +34,26 @@ public class PercentileSmartTDigestAggregationFunctionTest {
       return "PERCENTILESMARTTDIGEST(" + column + ", " + percent + ", 
'THRESHOLD=1')";
     }
 
+    // t-digest 3.3 changed interpolation for small datasets: values snap to 
integers
+    // instead of interpolating between adjacent values (e.g., p10 returns 1.0 
not 0.5)
     @Override
     String expectedAggrWithNull10(Scenario scenario) {
-      return "0.5";
+      return "1.0";
     }
 
     @Override
     String expectedAggrWithNull30(Scenario scenario) {
-      return "2.5";
+      return "3.0";
     }
 
     @Override
     String expectedAggrWithNull50(Scenario scenario) {
-      return "4.5";
+      return "5.0";
     }
 
     @Override
     String expectedAggrWithNull70(Scenario scenario) {
-      return "6.5";
-    }
-
-    @Override
-    String expectedAggrWithoutNull55(Scenario scenario) {
-      switch (scenario.getDataType()) {
-        case INT:
-          return "-6.442450943999939E8";
-        case LONG:
-          return "-2.7670116110564065E18";
-        case FLOAT:
-        case DOUBLE:
-          return "-Infinity";
-        default:
-          throw new IllegalArgumentException("Unsupported datatype " + 
scenario.getDataType());
-      }
+      return "7.0";
     }
 
     @Override
@@ -76,7 +63,7 @@ public class PercentileSmartTDigestAggregationFunctionTest {
 
     @Override
     String expectedAggrWithoutNull90(Scenario scenario) {
-      return "7.100000000000001";
+      return "7.0";
     }
   }
 }
diff --git 
a/pinot-core/src/test/java/org/apache/pinot/core/query/aggregation/function/PercentileTDigestMVAggregationFunctionTest.java
 
b/pinot-core/src/test/java/org/apache/pinot/core/query/aggregation/function/PercentileTDigestMVAggregationFunctionTest.java
index 59decfbaadd..2c4129a56d5 100644
--- 
a/pinot-core/src/test/java/org/apache/pinot/core/query/aggregation/function/PercentileTDigestMVAggregationFunctionTest.java
+++ 
b/pinot-core/src/test/java/org/apache/pinot/core/query/aggregation/function/PercentileTDigestMVAggregationFunctionTest.java
@@ -41,9 +41,9 @@ public class PercentileTDigestMVAggregationFunctionTest 
extends AbstractAggregat
         .andOnSecondInstance(
             new Object[]{"6.0;7.0;8.0;9.0;10.0"}
         )
-        // All values: 1-10, p50 should be around 5
+        // All values: 1-10, p50 (t-digest approximate)
         .whenQuery("select percentiletdigest(mv, 50) from testTable")
-        .thenResultIs("DOUBLE", "5.5");
+        .thenResultIs("DOUBLE", "6.0");
   }
 
   @Test
@@ -66,7 +66,7 @@ public class PercentileTDigestMVAggregationFunctionTest 
extends AbstractAggregat
         )
         .whenQuery("select sv, percentiletdigest(mv, 50) from testTable group 
by sv order by sv")
         .thenResultIs("STRING | DOUBLE",
-            "k1 | 5.5",   // values: 1-10, p50 ~= 5.5
+            "k1 | 6.0",   // values: 1-10, p50 (t-digest approximate)
             "k2 | 30.0"); // values: 10, 20, 30, 40, 50, p50 ~= 30
   }
 
@@ -89,7 +89,7 @@ public class PercentileTDigestMVAggregationFunctionTest 
extends AbstractAggregat
         )
         .whenQuery("select tags, percentiletdigest(nums, 50) from testTable 
group by tags order by tags")
         .thenResultIs("STRING | DOUBLE",
-            "tag1 | 3.5",  // nums: 1, 2, 3, 4, 5, 6, p50 ~= 3.5
-            "tag2 | 3.5"); // nums: 1, 2, 3, 4, 5, 6, p50 ~= 3.5
+            "tag1 | 4.0",  // nums: 1, 2, 3, 4, 5, 6, p50 (t-digest 
approximate)
+            "tag2 | 4.0"); // nums: 1, 2, 3, 4, 5, 6, p50 (t-digest 
approximate)
   }
 }
diff --git 
a/pinot-core/src/test/java/org/apache/pinot/core/startree/v2/PreAggregatedPercentileTDigestStarTreeV2Test.java
 
b/pinot-core/src/test/java/org/apache/pinot/core/startree/v2/PreAggregatedPercentileTDigestStarTreeV2Test.java
index 356eed978cb..b1da943da15 100644
--- 
a/pinot-core/src/test/java/org/apache/pinot/core/startree/v2/PreAggregatedPercentileTDigestStarTreeV2Test.java
+++ 
b/pinot-core/src/test/java/org/apache/pinot/core/startree/v2/PreAggregatedPercentileTDigestStarTreeV2Test.java
@@ -30,8 +30,13 @@ import static org.testng.Assert.assertEquals;
 
 
 public class PreAggregatedPercentileTDigestStarTreeV2Test extends 
BaseStarTreeV2Test<Object, TDigest> {
-  // Use non-default compression
-  private static final double COMPRESSION = 50;
+  // Use high compression to keep star-tree vs non-star-tree quantile 
divergence within 0.5%.
+  // t-digest 3.3 changed centroid management (unit-weight first/last 
centroids, stricter tail interpolation),
+  // which increases merge-order sensitivity. The star-tree path does 
multi-level serialize/deserialize/merge
+  // while the non-star-tree path merges sequentially, causing quantile 
divergence at low compression values.
+  // Experimentally verified: compression >= 750 keeps error < 0.5% across 10 
randomized runs.
+  private static final double COMPRESSION = 750;
+  private static final double MAX_ERROR = 0.005;
   private static final int MAX_VALUE = 10000;
 
   @Override
@@ -54,7 +59,7 @@ public class PreAggregatedPercentileTDigestStarTreeV2Test 
extends BaseStarTreeV2
 
   @Override
   void assertAggregatedValue(TDigest starTreeResult, TDigest 
nonStarTreeResult) {
-    double delta = MAX_VALUE * 0.05;
+    double delta = MAX_VALUE * MAX_ERROR;
     for (int i = 0; i <= 100; i++) {
       assertEquals(starTreeResult.quantile(i / 100.0), 
nonStarTreeResult.quantile(i / 100.0), delta);
     }
diff --git a/pom.xml b/pom.xml
index df7c09beede..7c1ca9eb586 100644
--- a/pom.xml
+++ b/pom.xml
@@ -207,7 +207,7 @@
     
<hadoop-shaded-protobuf_3_25.version>1.5.0</hadoop-shaded-protobuf_3_25.version>
     <clearspring-stream-lib.version>2.9.8</clearspring-stream-lib.version>
     <datasketches-java.version>6.2.0</datasketches-java.version>
-    <t-digest.version>3.2</t-digest.version>
+    <t-digest.version>3.3</t-digest.version>
     <picocli.version>4.7.7</picocli.version>
     <tyrus-standalone-client.version>2.2.2</tyrus-standalone-client.version>
     <jopt-simple.version>5.0.4</jopt-simple.version>


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to