This is an automated email from the ASF dual-hosted git repository.
taiyang-li pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gluten.git
The following commit(s) were added to refs/heads/main by this push:
new fb646bedb9 [CH] Support skewness aggregate function (#12294)
fb646bedb9 is described below
commit fb646bedb9dbbfd04d8cfb5871c64c9dac0ddd84
Author: Navaneeth Sujith <[email protected]>
AuthorDate: Sat Jun 27 06:26:44 2026 -0700
[CH] Support skewness aggregate function (#12294)
* [CH] Support skewness aggregate function
* removed comment
* added new test logic
* [CH] Cover skewness with grouping keys and drop duplicate fallback check
* fix by splitting line 108
---
.../main/scala/org/apache/gluten/utils/CHExpressionUtil.scala | 1 -
.../gluten/execution/GlutenClickhouseCountDistinctSuite.scala | 10 ++++++----
.../aggregate_function_parser/SimpleStatisticsFunctions.cpp | 7 +++++++
3 files changed, 13 insertions(+), 5 deletions(-)
diff --git
a/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala
b/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala
index 3bbb4a467f..e5bee2dc20 100644
---
a/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala
+++
b/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala
@@ -192,7 +192,6 @@ object CHExpressionUtil {
URL_DECODE -> DefaultValidator(),
URL_ENCODE -> DefaultValidator(),
FORMAT_STRING -> FormatStringValidator(),
- SKEWNESS -> DefaultValidator(),
MAKE_YM_INTERVAL -> DefaultValidator(),
MAP_ZIP_WITH -> DefaultValidator(),
KURTOSIS -> DefaultValidator(),
diff --git
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickhouseCountDistinctSuite.scala
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickhouseCountDistinctSuite.scala
index 22a82a9439..7921350544 100644
---
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickhouseCountDistinctSuite.scala
+++
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickhouseCountDistinctSuite.scala
@@ -100,12 +100,14 @@ class GlutenClickhouseCountDistinctSuite extends
GlutenClickHouseWholeStageTrans
}
test("check count distinct with agg fallback") {
- // skewness agg is not supported, will cause fallback
val sql = "select count(distinct(a,b)) , skewness(b) from " +
"values (0, null,1), (0,null,1), (1, 1,1), (2, 2, 1) ,(2,2,2),(3,3,3) as
data(a,b,c)"
- assertThrows[UnsupportedOperationException] {
- spark.sql(sql).show
- }
+ compareResultsAgainstVanillaSpark(sql, true, { _ => })
+
+ val sqlWithKeys = "select a, count(distinct(b)) , skewness(b) from " +
+ "values (0, null,1), (0,null,1), (1, 1,1), (2, 2, 1) ,(2,2,2),(3,3,3) as
data(a,b,c) " +
+ "group by a"
+ compareResultsAgainstVanillaSpark(sqlWithKeys, true, { _ => })
}
test("check count distinct with expr fallback") {
diff --git
a/cpp-ch/local-engine/Parser/aggregate_function_parser/SimpleStatisticsFunctions.cpp
b/cpp-ch/local-engine/Parser/aggregate_function_parser/SimpleStatisticsFunctions.cpp
index bda181e0fa..93ed056cbf 100644
---
a/cpp-ch/local-engine/Parser/aggregate_function_parser/SimpleStatisticsFunctions.cpp
+++
b/cpp-ch/local-engine/Parser/aggregate_function_parser/SimpleStatisticsFunctions.cpp
@@ -63,7 +63,14 @@ public:
return func_node;
}
};
+// for skewness
+struct SkewnessNameStruct
+{
+ static constexpr auto spark_name = "skewness";
+ static constexpr auto ch_name = "skewSamp";
+};
+static const
AggregateFunctionParserRegister<AggregateFunctionParserStddev<SkewnessNameStruct>>
registerer_skewness;
static const
AggregateFunctionParserRegister<AggregateFunctionParserStddev<StddevNameStruct>>
registerer_stddev;
static const
AggregateFunctionParserRegister<AggregateFunctionParserStddev<StddevSampNameStruct>>
registerer_stddev_samp;
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]