This is an automated email from the ASF dual-hosted git repository.

philo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new 7e5b4fc7a [VL] Support regr_intercept aggregate function (#5273)
7e5b4fc7a is described below

commit 7e5b4fc7a1ec495e516add2d2b89fe9a1a1af5e4
Author: Joey <[email protected]>
AuthorDate: Wed Apr 3 17:22:33 2024 +0800

    [VL] Support regr_intercept aggregate function (#5273)
---
 .../org/apache/gluten/utils/CHExpressionUtil.scala   |  1 +
 .../apache/gluten/utils/VeloxIntermediateData.scala  | 20 ++++++++++++--------
 .../execution/VeloxAggregateFunctionsSuite.scala     | 19 +++++++++++++++++++
 cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc |  3 ++-
 docs/velox-backend-support-progress.md               |  1 +
 .../apache/gluten/expression/ExpressionNames.scala   |  1 +
 .../gluten/sql/shims/spark34/Spark34Shims.scala      |  3 ++-
 7 files changed, 38 insertions(+), 10 deletions(-)

diff --git 
a/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala
 
b/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala
index d169d9ec5..f0f17b172 100644
--- 
a/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala
+++ 
b/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala
@@ -186,6 +186,7 @@ object CHExpressionUtil {
     KURTOSIS -> DefaultValidator(),
     REGR_R2 -> DefaultValidator(),
     REGR_SLOPE -> DefaultValidator(),
+    REGR_INTERCEPT -> DefaultValidator(),
     TO_UTC_TIMESTAMP -> DefaultValidator(),
     FROM_UTC_TIMESTAMP -> DefaultValidator()
   )
diff --git 
a/backends-velox/src/main/scala/org/apache/gluten/utils/VeloxIntermediateData.scala
 
b/backends-velox/src/main/scala/org/apache/gluten/utils/VeloxIntermediateData.scala
index 149634a47..b3bb62ec6 100644
--- 
a/backends-velox/src/main/scala/org/apache/gluten/utils/VeloxIntermediateData.scala
+++ 
b/backends-velox/src/main/scala/org/apache/gluten/utils/VeloxIntermediateData.scala
@@ -42,8 +42,8 @@ object VeloxIntermediateData {
   // Skewness, Kurtosis
   private val veloxCentralMomentAggIntermediateDataOrder: Seq[Seq[String]] =
     Seq("n", "avg", "m2", "m3", "m4").map(Seq(_))
-  // RegrSlope
-  private val veloxRegrSlopeIntermediateDataOrder: Seq[Seq[String]] =
+  // RegrSlope, RegrIntercept
+  private val veloxRegrIntermediateDataOrder: Seq[Seq[String]] =
     Seq("ck", "n", "m2", "xAvg:avg", "yAvg").map(attr => attr.split(":").toSeq)
 
   // Agg functions with inconsistent types of intermediate data between Velox 
and Spark.
@@ -58,8 +58,8 @@ object VeloxIntermediateData {
   // Skewness, Kurtosis
   private val veloxCentralMomentAggIntermediateTypes: Seq[DataType] =
     Seq(LongType, DoubleType, DoubleType, DoubleType, DoubleType)
-  // RegrSlope
-  private val veloxRegrSlopeIntermediateTypes: Seq[DataType] =
+  // RegrSlope, RegrIntercept
+  private val veloxRegrIntermediateTypes: Seq[DataType] =
     Seq(DoubleType, LongType, DoubleType, DoubleType, DoubleType)
 
   def getAttrIndex(intermediateDataOrder: Seq[Seq[String]], attr: String): Int 
=
@@ -89,8 +89,10 @@ object VeloxIntermediateData {
       // certain versions of Spark, and SparkShim is not dependent on the 
backend-velox module. It
       // is not convenient to include Velox-specific logic in SparkShim. Using 
class names to match
       // aggFunc is reliable in this case, as there are no cases of duplicate 
names.
-      case _ if aggFunc.getClass.getSimpleName.equals("RegrSlope") =>
-        veloxRegrSlopeIntermediateDataOrder
+      case _
+          if aggFunc.getClass.getSimpleName.equals("RegrSlope") ||
+            aggFunc.getClass.getSimpleName.equals("RegrIntercept") =>
+        veloxRegrIntermediateDataOrder
       case _ =>
         aggFunc.aggBufferAttributes.map(_.name).map(Seq(_))
     }
@@ -172,8 +174,10 @@ object VeloxIntermediateData {
           Some(veloxVarianceIntermediateTypes)
         case _: Skewness | _: Kurtosis =>
           Some(veloxCentralMomentAggIntermediateTypes)
-        case _ if aggFunc.getClass.getSimpleName.equals("RegrSlope") =>
-          Some(veloxRegrSlopeIntermediateTypes)
+        case _
+            if aggFunc.getClass.getSimpleName.equals("RegrSlope") ||
+              aggFunc.getClass.getSimpleName.equals("RegrIntercept") =>
+          Some(veloxRegrIntermediateTypes)
         case _ if aggFunc.aggBufferAttributes.size > 1 =>
           Some(aggFunc.aggBufferAttributes.map(_.dataType))
         case _ => None
diff --git 
a/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxAggregateFunctionsSuite.scala
 
b/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxAggregateFunctionsSuite.scala
index 6d84d622f..3a181cfdd 100644
--- 
a/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxAggregateFunctionsSuite.scala
+++ 
b/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxAggregateFunctionsSuite.scala
@@ -409,6 +409,25 @@ abstract class VeloxAggregateFunctionsSuite extends 
VeloxWholeStageTransformerSu
     }
   }
 
+  testWithSpecifiedSparkVersion("regr_intercept", Some("3.4")) {
+    runQueryAndCompare("""
+                         |select regr_intercept(l_partkey, l_suppkey) from 
lineitem;
+                         |""".stripMargin) {
+      checkGlutenOperatorMatch[HashAggregateExecTransformer]
+    }
+    runQueryAndCompare(
+      "select regr_intercept(l_partkey, l_suppkey), count(distinct l_orderkey) 
from lineitem") {
+      df =>
+        {
+          assert(
+            getExecutedPlan(df).count(
+              plan => {
+                plan.isInstanceOf[HashAggregateExecTransformer]
+              }) == 4)
+        }
+    }
+  }
+
   test("first") {
     runQueryAndCompare(s"""
                           |select first(l_linenumber), first(l_linenumber, 
true) from lineitem;
diff --git a/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc 
b/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc
index a302701b4..43f808066 100644
--- a/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc
+++ b/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc
@@ -1081,7 +1081,8 @@ bool SubstraitToVeloxPlanValidator::validate(const 
::substrait::AggregateRel& ag
       "approx_distinct",
       "skewness",
       "kurtosis",
-      "regr_slope"};
+      "regr_slope",
+      "regr_intercept"};
 
   auto udfFuncs = UdfLoader::getInstance()->getRegisteredUdafNames();
 
diff --git a/docs/velox-backend-support-progress.md 
b/docs/velox-backend-support-progress.md
index 28ac7218a..0b5015bc5 100644
--- a/docs/velox-backend-support-progress.md
+++ b/docs/velox-backend-support-progress.md
@@ -379,6 +379,7 @@ Gluten supports 199 functions. (Drag to right to see all 
data types)
 | min                           | min                    |                     
  | S      |                        |         |      | S     | S   | S    | S   
  | S      |      |           |        |         |      |        |          |   
    |      |        |      |
 | min_by                        |                        |                     
  | S      |                        |         |      |       |     |      |     
  |        |      |           |        |         |      |        |          |   
    |      |        |      |
 | regr_r2                       | regr_r2                | regr_r2             
  | S      |                        |         |      | S     | S   | S    | S   
  | S      |      |           |        |         |      |        |          |   
    |      |        |      |
+| regr_intercept                | regr_intercept         | regr_intercept      
  | S      |                        |         |      | S     | S   | S    | S   
  | S      |      |           |        |         |      |        |          |   
    |      |        |      |
 | regr_slope                    | regr_slope             | regr_slope          
  | S      |                        |         |      | S     | S   | S    | S   
  | S      |      |           |        |         |      |        |          |   
    |      |        |      |
 | skewness                      | skewness               | skewness            
  | S      |                        |         |      | S     | S   | S    | S   
  | S      |      |           |        |         |      |        |          |   
    |      |        |      |
 | some                          |                        |                     
  |        |                        |         |      |       |     |      |     
  |        |      |           |        |         |      |        |          |   
    |      |        |      |
diff --git 
a/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala
 
b/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala
index 6206db3f2..aeaff9e53 100644
--- 
a/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala
+++ 
b/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala
@@ -50,6 +50,7 @@ object ExpressionNames {
   final val SKEWNESS = "skewness"
   final val KURTOSIS = "kurtosis"
   final val REGR_SLOPE = "regr_slope"
+  final val REGR_INTERCEPT = "regr_intercept"
 
   // Function names used by Substrait plan.
   final val ADD = "add"
diff --git 
a/shims/spark34/src/main/scala/org/apache/gluten/sql/shims/spark34/Spark34Shims.scala
 
b/shims/spark34/src/main/scala/org/apache/gluten/sql/shims/spark34/Spark34Shims.scala
index 2178b1d17..2604ad929 100644
--- 
a/shims/spark34/src/main/scala/org/apache/gluten/sql/shims/spark34/Spark34Shims.scala
+++ 
b/shims/spark34/src/main/scala/org/apache/gluten/sql/shims/spark34/Spark34Shims.scala
@@ -81,7 +81,8 @@ class Spark34Shims extends SparkShims {
   override def aggregateExpressionMappings: Seq[Sig] = {
     Seq(
       Sig[RegrR2](ExpressionNames.REGR_R2),
-      Sig[RegrSlope](ExpressionNames.REGR_SLOPE)
+      Sig[RegrSlope](ExpressionNames.REGR_SLOPE),
+      Sig[RegrIntercept](ExpressionNames.REGR_INTERCEPT)
     )
   }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to