This is an automated email from the ASF dual-hosted git repository.
rui pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new 628763fc3 [VL] Support regr_sxx and regr_syy aggregate functions for
Spark 3.4 (#5444)
628763fc3 is described below
commit 628763fc3a9f471e4b2c25c1d07efc968857be16
Author: Joey <[email protected]>
AuthorDate: Fri Apr 19 13:02:15 2024 +0800
[VL] Support regr_sxx and regr_syy aggregate functions for Spark 3.4 (#5444)
---
.../execution/VeloxAggregateFunctionsSuite.scala | 38 ++++++++++++++++++++--
.../substrait/SubstraitToVeloxPlanValidator.cc | 3 +-
docs/velox-backend-support-progress.md | 3 ++
.../apache/gluten/expression/ExpressionNames.scala | 1 +
.../gluten/sql/shims/spark34/Spark34Shims.scala | 3 +-
5 files changed, 43 insertions(+), 5 deletions(-)
diff --git
a/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxAggregateFunctionsSuite.scala
b/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxAggregateFunctionsSuite.scala
index 2573725a7..df0817410 100644
---
a/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxAggregateFunctionsSuite.scala
+++
b/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxAggregateFunctionsSuite.scala
@@ -432,14 +432,46 @@ abstract class VeloxAggregateFunctionsSuite extends
VeloxWholeStageTransformerSu
}
// Disable for Sparke3.5.
- testWithSpecifiedSparkVersion("regr_sxy", Some("3.4"), Some("3.4")) {
+ testWithSpecifiedSparkVersion("regr_sxy regr_sxx regr_syy", Some("3.4"),
Some("3.4")) {
runQueryAndCompare("""
- |select regr_sxy(l_partkey, l_suppkey) from lineitem;
+ |select regr_sxy(l_quantity, l_tax) from lineitem;
|""".stripMargin) {
checkGlutenOperatorMatch[HashAggregateExecTransformer]
}
runQueryAndCompare(
- "select regr_sxy(l_partkey, l_suppkey), count(distinct l_orderkey) from
lineitem") {
+ "select regr_sxy(l_quantity, l_tax), count(distinct l_orderkey) from
lineitem") {
+ df =>
+ {
+ assert(
+ getExecutedPlan(df).count(
+ plan => {
+ plan.isInstanceOf[HashAggregateExecTransformer]
+ }) == 4)
+ }
+ }
+ runQueryAndCompare("""
+ |select regr_sxx(l_quantity, l_tax) from lineitem;
+ |""".stripMargin) {
+ checkGlutenOperatorMatch[HashAggregateExecTransformer]
+ }
+ runQueryAndCompare(
+ "select regr_sxx(l_quantity, l_tax), count(distinct l_orderkey) from
lineitem") {
+ df =>
+ {
+ assert(
+ getExecutedPlan(df).count(
+ plan => {
+ plan.isInstanceOf[HashAggregateExecTransformer]
+ }) == 4)
+ }
+ }
+ runQueryAndCompare("""
+ |select regr_syy(l_quantity, l_tax) from lineitem;
+ |""".stripMargin) {
+ checkGlutenOperatorMatch[HashAggregateExecTransformer]
+ }
+ runQueryAndCompare(
+ "select regr_syy(l_quantity, l_tax), count(distinct l_orderkey) from
lineitem") {
df =>
{
assert(
diff --git a/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc
b/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc
index f992b94c3..2a5857ae9 100644
--- a/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc
+++ b/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc
@@ -1106,7 +1106,8 @@ bool SubstraitToVeloxPlanValidator::validate(const
::substrait::AggregateRel& ag
"kurtosis",
"regr_slope",
"regr_intercept",
- "regr_sxy"};
+ "regr_sxy",
+ "regr_replacement"};
auto udfFuncs = UdfLoader::getInstance()->getRegisteredUdafNames();
diff --git a/docs/velox-backend-support-progress.md
b/docs/velox-backend-support-progress.md
index 5e81081b7..4b480529e 100644
--- a/docs/velox-backend-support-progress.md
+++ b/docs/velox-backend-support-progress.md
@@ -384,6 +384,9 @@ Gluten supports 199 functions. (Drag to right to see all
data types)
| regr_r2 | regr_r2 | regr_r2
| S | | | | S | S | S | S
| S | | | | | | | |
| | | |
| regr_intercept | regr_intercept | regr_intercept
| S | | | | S | S | S | S
| S | | | | | | | |
| | | |
| regr_slope | regr_slope | regr_slope
| S | | | | S | S | S | S
| S | | | | | | | |
| | | |
+| regr_sxy | regr_sxy | regr_sxy
| S | | | | S | S | S | S
| S | | | | | | | |
| | | |
+| regr_sxx | regr_sxx | regr_sxx
| S | | | | S | S | S | S
| S | | | | | | | |
| | | |
+| regr_syy | regr_syy | regr_syy
| S | | | | S | S | S | S
| S | | | | | | | |
| | | |
| skewness | skewness | skewness
| S | | | | S | S | S | S
| S | | | | | | | |
| | | |
| some | |
| | | | | | | |
| | | | | | | | |
| | | |
| std,stddev | stddev |
| S | | | | S | S | S | S
| S | | | | | | | |
| | | |
diff --git
a/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala
b/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala
index 851c81a8d..26f63bc75 100644
---
a/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala
+++
b/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala
@@ -52,6 +52,7 @@ object ExpressionNames {
final val REGR_SLOPE = "regr_slope"
final val REGR_INTERCEPT = "regr_intercept"
final val REGR_SXY = "regr_sxy"
+ final val REGR_REPLACEMENT = "regr_replacement"
// Function names used by Substrait plan.
final val ADD = "add"
diff --git
a/shims/spark34/src/main/scala/org/apache/gluten/sql/shims/spark34/Spark34Shims.scala
b/shims/spark34/src/main/scala/org/apache/gluten/sql/shims/spark34/Spark34Shims.scala
index fe06d7857..aa19e2a2c 100644
---
a/shims/spark34/src/main/scala/org/apache/gluten/sql/shims/spark34/Spark34Shims.scala
+++
b/shims/spark34/src/main/scala/org/apache/gluten/sql/shims/spark34/Spark34Shims.scala
@@ -86,7 +86,8 @@ class Spark34Shims extends SparkShims {
Sig[RegrR2](ExpressionNames.REGR_R2),
Sig[RegrSlope](ExpressionNames.REGR_SLOPE),
Sig[RegrIntercept](ExpressionNames.REGR_INTERCEPT),
- Sig[RegrSXY](ExpressionNames.REGR_SXY)
+ Sig[RegrSXY](ExpressionNames.REGR_SXY),
+ Sig[RegrReplacement](ExpressionNames.REGR_REPLACEMENT)
)
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]