This is an automated email from the ASF dual-hosted git repository.

philo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new d659c80413 [GLUTEN-9287][VL] Enable array_compact function for Spark 
3.4+ (#9349)
d659c80413 is described below

commit d659c804134f48c50d90a5932132fd489daae803
Author: dcoliversun <[email protected]>
AuthorDate: Fri Apr 18 01:42:03 2025 +0800

    [GLUTEN-9287][VL] Enable array_compact function for Spark 3.4+ (#9349)
---
 .../backendsapi/velox/VeloxSparkPlanExecApi.scala   |  3 +++
 .../execution/ScalarFunctionsValidateSuite.scala    | 21 +++++++++++++++++++++
 .../apache/gluten/expression/ExpressionNames.scala  |  1 +
 .../gluten/sql/shims/spark34/Spark34Shims.scala     |  1 +
 .../gluten/sql/shims/spark35/Spark35Shims.scala     |  1 +
 5 files changed, 27 insertions(+)

diff --git 
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala
 
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala
index 9577b2b3f0..77cb2d68be 100644
--- 
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala
+++ 
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala
@@ -181,6 +181,9 @@ class VeloxSparkPlanExecApi extends SparkPlanExecApi {
       function: ExpressionTransformer,
       expr: ArrayFilter): ExpressionTransformer = {
     expr.function match {
+      // Transformer for array_compact.
+      case LambdaFunction(_: IsNotNull, _, _) =>
+        GenericExpressionTransformer(ExpressionNames.ARRAY_COMPACT, 
Seq(argument), expr)
       case LambdaFunction(_, arguments, _) if arguments.size == 2 =>
         throw new GlutenNotSupportException(
           "filter on array with lambda using index argument is not supported 
yet")
diff --git 
a/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala
 
b/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala
index 45b14bfb2d..58c18400b6 100644
--- 
a/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala
+++ 
b/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala
@@ -196,6 +196,27 @@ abstract class ScalarFunctionsValidateSuite extends 
FunctionsValidateSuite {
     }
   }
 
+  testWithMinSparkVersion("Test array_compact function", "3.4") {
+    withTempPath {
+      path =>
+        Seq[Array[String]](
+          Array("a", "b"),
+          Array(),
+          Array("a", "b", null.asInstanceOf[String]),
+          Array(null.asInstanceOf[String])
+        )
+          .toDF("arr")
+          .write
+          .parquet(path.getCanonicalPath)
+
+        
spark.read.parquet(path.getCanonicalPath).createOrReplaceTempView("tbl")
+
+        runQueryAndCompare("select arr, array_compact(arr) from tbl") {
+          checkGlutenOperatorMatch[ProjectExecTransformer]
+        }
+    }
+  }
+
   test("Test round function") {
     runQueryAndCompare(
       "SELECT round(cast(l_orderkey as int), 2)" +
diff --git 
a/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala
 
b/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala
index cb8ce2826b..c822161ea5 100644
--- 
a/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala
+++ 
b/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala
@@ -286,6 +286,7 @@ object ExpressionNames {
   final val ARRAY_PREPEND = "array_prepend"
   final val ARRAY_SIZE = "array_size"
   final val GET = "get"
+  final val ARRAY_COMPACT = "array_compact"
 
   // Map functions
   final val CREATE_MAP = "map"
diff --git 
a/shims/spark34/src/main/scala/org/apache/gluten/sql/shims/spark34/Spark34Shims.scala
 
b/shims/spark34/src/main/scala/org/apache/gluten/sql/shims/spark34/Spark34Shims.scala
index de0f72b82f..8ff5e17873 100644
--- 
a/shims/spark34/src/main/scala/org/apache/gluten/sql/shims/spark34/Spark34Shims.scala
+++ 
b/shims/spark34/src/main/scala/org/apache/gluten/sql/shims/spark34/Spark34Shims.scala
@@ -105,6 +105,7 @@ class Spark34Shims extends SparkShims {
 
   override def runtimeReplaceableExpressionMappings: Seq[Sig] = {
     Seq(
+      Sig[ArrayCompact](ExpressionNames.ARRAY_COMPACT),
       Sig[ArraySize](ExpressionNames.ARRAY_SIZE),
       Sig[EqualNull](ExpressionNames.EQUAL_NULL),
       Sig[ILike](ExpressionNames.ILIKE),
diff --git 
a/shims/spark35/src/main/scala/org/apache/gluten/sql/shims/spark35/Spark35Shims.scala
 
b/shims/spark35/src/main/scala/org/apache/gluten/sql/shims/spark35/Spark35Shims.scala
index 5284a4188b..43a3239a2a 100644
--- 
a/shims/spark35/src/main/scala/org/apache/gluten/sql/shims/spark35/Spark35Shims.scala
+++ 
b/shims/spark35/src/main/scala/org/apache/gluten/sql/shims/spark35/Spark35Shims.scala
@@ -109,6 +109,7 @@ class Spark35Shims extends SparkShims {
 
   override def runtimeReplaceableExpressionMappings: Seq[Sig] = {
     Seq(
+      Sig[ArrayCompact](ExpressionNames.ARRAY_COMPACT),
       Sig[ArrayPrepend](ExpressionNames.ARRAY_PREPEND),
       Sig[ArraySize](ExpressionNames.ARRAY_SIZE),
       Sig[EqualNull](ExpressionNames.EQUAL_NULL),


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to