This is an automated email from the ASF dual-hosted git repository.

taiyang-li pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new fb646bedb9 [CH] Support skewness aggregate function (#12294)
fb646bedb9 is described below

commit fb646bedb9dbbfd04d8cfb5871c64c9dac0ddd84
Author: Navaneeth Sujith <[email protected]>
AuthorDate: Sat Jun 27 06:26:44 2026 -0700

    [CH] Support skewness aggregate function (#12294)
    
    * [CH] Support skewness aggregate function
    
    * removed  comment
    
    * added new test logic
    
    * [CH] Cover skewness with grouping keys and drop duplicate fallback check
    
    * fix by splitting line 108
---
 .../main/scala/org/apache/gluten/utils/CHExpressionUtil.scala  |  1 -
 .../gluten/execution/GlutenClickhouseCountDistinctSuite.scala  | 10 ++++++----
 .../aggregate_function_parser/SimpleStatisticsFunctions.cpp    |  7 +++++++
 3 files changed, 13 insertions(+), 5 deletions(-)

diff --git 
a/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala
 
b/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala
index 3bbb4a467f..e5bee2dc20 100644
--- 
a/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala
+++ 
b/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala
@@ -192,7 +192,6 @@ object CHExpressionUtil {
     URL_DECODE -> DefaultValidator(),
     URL_ENCODE -> DefaultValidator(),
     FORMAT_STRING -> FormatStringValidator(),
-    SKEWNESS -> DefaultValidator(),
     MAKE_YM_INTERVAL -> DefaultValidator(),
     MAP_ZIP_WITH -> DefaultValidator(),
     KURTOSIS -> DefaultValidator(),
diff --git 
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickhouseCountDistinctSuite.scala
 
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickhouseCountDistinctSuite.scala
index 22a82a9439..7921350544 100644
--- 
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickhouseCountDistinctSuite.scala
+++ 
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickhouseCountDistinctSuite.scala
@@ -100,12 +100,14 @@ class GlutenClickhouseCountDistinctSuite extends 
GlutenClickHouseWholeStageTrans
   }
 
   test("check count distinct with agg fallback") {
-    // skewness agg is not supported, will cause fallback
     val sql = "select count(distinct(a,b)) , skewness(b) from " +
       "values (0, null,1), (0,null,1), (1, 1,1), (2, 2, 1) ,(2,2,2),(3,3,3) as 
data(a,b,c)"
-    assertThrows[UnsupportedOperationException] {
-      spark.sql(sql).show
-    }
+    compareResultsAgainstVanillaSpark(sql, true, { _ => })
+
+    val sqlWithKeys = "select a, count(distinct(b)) , skewness(b) from " +
+      "values (0, null,1), (0,null,1), (1, 1,1), (2, 2, 1) ,(2,2,2),(3,3,3) as 
data(a,b,c) " +
+      "group by a"
+    compareResultsAgainstVanillaSpark(sqlWithKeys, true, { _ => })
   }
 
   test("check count distinct with expr fallback") {
diff --git 
a/cpp-ch/local-engine/Parser/aggregate_function_parser/SimpleStatisticsFunctions.cpp
 
b/cpp-ch/local-engine/Parser/aggregate_function_parser/SimpleStatisticsFunctions.cpp
index bda181e0fa..93ed056cbf 100644
--- 
a/cpp-ch/local-engine/Parser/aggregate_function_parser/SimpleStatisticsFunctions.cpp
+++ 
b/cpp-ch/local-engine/Parser/aggregate_function_parser/SimpleStatisticsFunctions.cpp
@@ -63,7 +63,14 @@ public:
         return func_node;
     }
 };
+// for skewness
+struct SkewnessNameStruct
+{
+    static constexpr auto spark_name = "skewness";
+    static constexpr auto ch_name = "skewSamp";
+};
 
+static const 
AggregateFunctionParserRegister<AggregateFunctionParserStddev<SkewnessNameStruct>>
 registerer_skewness;
 static const 
AggregateFunctionParserRegister<AggregateFunctionParserStddev<StddevNameStruct>>
 registerer_stddev;
 static const 
AggregateFunctionParserRegister<AggregateFunctionParserStddev<StddevSampNameStruct>>
 registerer_stddev_samp;
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to